From 771117cafeedffc8a9b5859503fc055bcd525d3e Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 09:39:46 -0400
Subject: [PATCH 01/10] perf(chat): cache the prefixed render of chat messages

Avoid rebuilding the focus and selection prefix for every line of every
message on every frame. Each item now keeps the prefixed output cached
and only rebuilds it when something actually changes.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/chat/assistant.go         |  22 ++-
 internal/ui/chat/messages.go          |  44 +++++-
 internal/ui/chat/prefix_cache_test.go | 196 ++++++++++++++++++++++++++
 internal/ui/chat/tools.go             |  24 +++-
 internal/ui/chat/user.go              |  20 ++-
 5 files changed, 302 insertions(+), 4 deletions(-)
 create mode 100644 internal/ui/chat/prefix_cache_test.go

diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go
index f49cd1d6ceca12b15f705640400da6941972342e..2977e7000c2945250bb327c1c79dc152b8c51f92 100644
--- a/internal/ui/chat/assistant.go
+++ b/internal/ui/chat/assistant.go
@@ -114,6 +114,22 @@ func (a *AssistantMessageItem) Render(width int) string {
 	// it's wrapping logic.
 	// We already know that the content is wrapped to the correct width in
 	// RawRender, so we can just apply the styles directly to each line.
+	//
+	// The split + per-line prefix loop is O(L); cache the result keyed
+	// by (width, focused) so steady-state Render becomes a pointer
+	// return. Bypass the cache while spinning (RawRender's spinner
+	// suffix changes every animation frame) or while a highlight range
+	// is active (selection drag).
+	useCache := !a.isSpinning() && !a.isHighlighted()
+	var key uint64
+	if a.focused {
+		key = 1
+	}
+	if useCache {
+		if cached, ok := a.getCachedPrefixedRender(width, key); ok {
+			return cached
+		}
+	}
 	focused := a.sty.Messages.AssistantFocused.Render()
 	blurred := a.sty.Messages.AssistantBlurred.Render()
 	rendered := a.RawRender(width)
@@ -125,7 +141,11 @@ func (a *AssistantMessageItem) Render(width int) string {
 			lines[i] = blurred + line
 		}
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	if useCache {
+		a.setCachedPrefixedRender(out, width, key)
+	}
+	return out
 }
 
 // renderMessageContent renders the message content including thinking, main content, and finish reason.
diff --git a/internal/ui/chat/messages.go b/internal/ui/chat/messages.go
index 6c86cb74951e0bf8f20fe7af4fa420b4527936ca..3c1c5a13b7ba13a5e6990b4cc546c8813c0d5252 100644
--- a/internal/ui/chat/messages.go
+++ b/internal/ui/chat/messages.go
@@ -156,6 +156,20 @@ type cachedMessageItem struct {
 	// width and height are the dimensions of the cached render
 	width  int
 	height int
+
+	// prefixedRendered caches the per-line-prefixed Render output (the
+	// result of splitting RawRender by newlines and prepending a focus
+	// or selection prefix to every line). Items rebuild this every
+	// frame today; caching it keyed by (prefixedWidth, prefixedKey)
+	// turns Render into a pointer return when item state is stable.
+	//
+	// Invalidation lives in clearCache; callers must additionally
+	// bypass this cache whenever the prefixed output would not be
+	// stable (spinner ticks, active highlight ranges) by not calling
+	// setCachedPrefixedRender for those frames.
+	prefixedRendered string
+	prefixedWidth    int
+	prefixedKey      uint64
 }
 
 // getCachedRender returns the cached render if it exists for the given width.
@@ -173,11 +187,31 @@ func (c *cachedMessageItem) setCachedRender(rendered string, width, height int)
 	c.height = height
 }
 
+// getCachedPrefixedRender returns the cached prefixed render if it exists
+// for the given (width, key). The key encodes any state that changes the
+// per-line prefix (focused/blurred, compact, ...).
+func (c *cachedMessageItem) getCachedPrefixedRender(width int, key uint64) (string, bool) {
+	if c.prefixedRendered != "" && c.prefixedWidth == width && c.prefixedKey == key {
+		return c.prefixedRendered, true
+	}
+	return "", false
+}
+
+// setCachedPrefixedRender stores the cached prefixed render.
+func (c *cachedMessageItem) setCachedPrefixedRender(rendered string, width int, key uint64) {
+	c.prefixedRendered = rendered
+	c.prefixedWidth = width
+	c.prefixedKey = key
+}
+
 // clearCache clears the cached render.
 func (c *cachedMessageItem) clearCache() {
 	c.rendered = ""
 	c.width = 0
 	c.height = 0
+	c.prefixedRendered = ""
+	c.prefixedWidth = 0
+	c.prefixedKey = 0
 }
 
 // focusableMessageItem is a base struct for message items that can be focused.
@@ -237,12 +271,20 @@ func (a *AssistantInfoItem) RawRender(width int) string {
 
 // Render implements MessageItem.
 func (a *AssistantInfoItem) Render(width int) string {
+	// AssistantInfoItem uses a single, state-independent prefix; key 0
+	// is sufficient. The cache is invalidated whenever the underlying
+	// cachedMessageItem render is cleared.
+	if cached, ok := a.getCachedPrefixedRender(width, 0); ok {
+		return cached
+	}
 	prefix := a.sty.Messages.SectionHeader.Render()
 	lines := strings.Split(a.RawRender(width), "\n")
 	for i, line := range lines {
 		lines[i] = prefix + line
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	a.setCachedPrefixedRender(out, width, 0)
+	return out
 }
 
 func (a *AssistantInfoItem) renderContent(width int) string {
diff --git a/internal/ui/chat/prefix_cache_test.go b/internal/ui/chat/prefix_cache_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..443f5a68dab73d8e518b37d867fc9aa8c758ca25
--- /dev/null
+++ b/internal/ui/chat/prefix_cache_test.go
@@ -0,0 +1,196 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/attachments"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// finishedAssistantMessage builds an assistant message with text content and a
+// finish part so AssistantMessageItem.isSpinning returns false and the
+// prefix cache is exercised.
+func finishedAssistantMessage(id, text string) *message.Message {
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: text},
+			message.Finish{Reason: message.FinishReasonEndTurn, Time: time.Now().Unix()},
+		},
+	}
+}
+
+// TestAssistantMessageItemRender_PrefixCacheFocusBlur covers the F3 invariant
+// that focus → blur → focus produces the correct prefix every time and never
+// leaks the previous focus state out of the cache.
+func TestAssistantMessageItemRender_PrefixCacheFocusBlur(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m1", "Hello world from the cache test.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 60
+
+	item.SetFocused(true)
+	focused1 := item.Render(width)
+	focused2 := item.Render(width)
+	require.Equal(t, focused1, focused2, "second render must hit the cache and match the first")
+
+	item.SetFocused(false)
+	blurred1 := item.Render(width)
+	require.NotEqual(t, focused1, blurred1, "blur must produce a different prefixed render than focus")
+
+	item.SetFocused(true)
+	focused3 := item.Render(width)
+	require.Equal(t, focused1, focused3, "re-focus must produce identical output to the original focused render")
+}
+
+// TestAssistantMessageItemRender_PrefixCacheWidthInvalidates asserts that a
+// width change does not return the previous width's cached output.
+func TestAssistantMessageItemRender_PrefixCacheWidthInvalidates(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m2", "Some content that wraps differently at different widths so the rendered output diverges.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	narrow := item.Render(40)
+	wide := item.Render(100)
+	require.NotEqual(t, narrow, wide, "different widths must produce different rendered output")
+
+	narrowAgain := item.Render(40)
+	require.Equal(t, narrow, narrowAgain, "returning to the original width must hit (or repopulate) the cache with the same output")
+}
+
+// TestAssistantMessageItemRender_PrefixCacheHighlightOnTop guarantees that
+// activating a highlight range bypasses the prefix cache so selection drags
+// reflect immediately, and that clearing the highlight returns to the cached
+// prefixed output unchanged.
+func TestAssistantMessageItemRender_PrefixCacheHighlightOnTop(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m3", "Hello world from the highlight test.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	const width = 60
+	plain := item.Render(width)
+
+	// Activating a highlight must change the rendered output (selection
+	// painted on top) without poisoning the cache for the un-highlighted
+	// state that follows.
+	item.SetHighlight(0, 0, 0, 5)
+	highlighted := item.Render(width)
+	require.NotEqual(t, plain, highlighted, "active highlight must change Render output")
+
+	// Clear the highlight; the cached un-highlighted prefix render must
+	// be returned unchanged.
+	item.SetHighlight(-1, -1, -1, -1)
+	plainAfter := item.Render(width)
+	require.Equal(t, plain, plainAfter, "clearing the highlight must restore the cached prefixed output exactly")
+}
+
+// TestUserMessageItemRender_PrefixCacheFocusBlur is the user-message
+// counterpart of the assistant focus/blur cache test.
+func TestUserMessageItemRender_PrefixCacheFocusBlur(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := &message.Message{
+		ID:   "u1",
+		Role: message.User,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "Hello from the user."},
+		},
+	}
+	r := attachments.NewRenderer(
+		sty.Attachments.Normal,
+		sty.Attachments.Deleting,
+		sty.Attachments.Image,
+		sty.Attachments.Text,
+	)
+	item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem)
+
+	const width = 60
+
+	item.SetFocused(true)
+	focused1 := item.Render(width)
+	focused2 := item.Render(width)
+	require.Equal(t, focused1, focused2)
+
+	item.SetFocused(false)
+	blurred := item.Render(width)
+	require.NotEqual(t, focused1, blurred)
+
+	item.SetFocused(true)
+	focused3 := item.Render(width)
+	require.Equal(t, focused1, focused3)
+}
+
+// TestCachedMessageItem_PrefixCacheSemantics covers the constant-prefix
+// path used by AssistantInfoItem and the (width, key) keying used by every
+// item, against the underlying cachedMessageItem helper directly. This
+// avoids constructing a full *config.Config with an initialized provider
+// map just to exercise cache plumbing that is identical for all callers.
+func TestCachedMessageItem_PrefixCacheSemantics(t *testing.T) {
+	t.Parallel()
+
+	c := &cachedMessageItem{}
+
+	// Empty cache: miss.
+	_, ok := c.getCachedPrefixedRender(80, 0)
+	require.False(t, ok)
+
+	// Set then hit at the same (width, key).
+	c.setCachedPrefixedRender("hello", 80, 0)
+	got, ok := c.getCachedPrefixedRender(80, 0)
+	require.True(t, ok)
+	require.Equal(t, "hello", got)
+
+	// Different width: miss.
+	_, ok = c.getCachedPrefixedRender(120, 0)
+	require.False(t, ok)
+
+	// Different key (focused vs blurred): miss.
+	_, ok = c.getCachedPrefixedRender(80, 1)
+	require.False(t, ok)
+
+	// clearCache drops the prefixed cache too.
+	c.setCachedRender("raw", 80, 1)
+	c.setCachedPrefixedRender("hello", 80, 0)
+	c.clearCache()
+	_, ok = c.getCachedPrefixedRender(80, 0)
+	require.False(t, ok, "clearCache must drop the prefixed render cache")
+	_, _, ok = c.getCachedRender(80)
+	require.False(t, ok, "clearCache must also drop the raw render cache")
+}
+
+// TestAssistantMessageItemRender_PrefixCacheNoCacheLeak guards against a
+// regression where the cache returned the prefixed output of the previous
+// width. We verify that the cached output for width=W contains the W-sized
+// prefix and not a stale wider one by checking that line lengths are
+// consistent on cache hit.
+func TestAssistantMessageItemRender_PrefixCacheNoCacheLeak(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m4", strings.Repeat("word ", 40))
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	out80 := item.Render(80)
+	out120 := item.Render(120)
+	require.NotEqual(t, out80, out120)
+
+	// Hit each cached entry again and confirm stability.
+	require.Equal(t, out80, item.Render(80))
+	require.Equal(t, out120, item.Render(120))
+}
diff --git a/internal/ui/chat/tools.go b/internal/ui/chat/tools.go
index 4715cfafa6a25f01b93ee56caf41aa55aee3196e..560fd2fc5312852b5cc91a228b4587f4accb0d2a 100644
--- a/internal/ui/chat/tools.go
+++ b/internal/ui/chat/tools.go
@@ -332,6 +332,24 @@ func (t *baseToolMessageItem) RawRender(width int) string {
 
 // Render renders the tool message item at the given width.
 func (t *baseToolMessageItem) Render(width int) string {
+	// Cache the prefixed output keyed by (width, prefix variant).
+	// Bypass the cache while spinning (RawRender output is
+	// frame-dependent) or while a highlight range is active.
+	useCache := !t.isSpinning() && !t.isHighlighted()
+	var key uint64
+	switch {
+	case t.isCompact:
+		key = 2
+	case t.focused:
+		key = 1
+	default:
+		key = 0
+	}
+	if useCache {
+		if cached, ok := t.getCachedPrefixedRender(width, key); ok {
+			return cached
+		}
+	}
 	var prefix string
 	if t.isCompact {
 		prefix = t.sty.Messages.ToolCallCompact.Render()
@@ -344,7 +362,11 @@ func (t *baseToolMessageItem) Render(width int) string {
 	for i, ln := range lines {
 		lines[i] = prefix + ln
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	if useCache {
+		t.setCachedPrefixedRender(out, width, key)
+	}
+	return out
 }
 
 // ToolCall returns the tool call associated with this message item.
diff --git a/internal/ui/chat/user.go b/internal/ui/chat/user.go
index b1160d2a1531cb6c2915e8df67cb1962079620e0..8194955a6fa99e20a7d949651d6d1400d8f3fb72 100644
--- a/internal/ui/chat/user.go
+++ b/internal/ui/chat/user.go
@@ -70,6 +70,20 @@ func (m *UserMessageItem) RawRender(width int) string {
 
 // Render implements MessageItem.
 func (m *UserMessageItem) Render(width int) string {
+	// Bypass the prefix cache while a highlight range is active so
+	// selection drags reflect immediately without invalidating the
+	// cache. Highlight changes are intentionally applied "above" the
+	// prefix cache.
+	useCache := !m.isHighlighted()
+	var key uint64
+	if m.focused {
+		key = 1
+	}
+	if useCache {
+		if cached, ok := m.getCachedPrefixedRender(width, key); ok {
+			return cached
+		}
+	}
 	var prefix string
 	if m.focused {
 		prefix = m.sty.Messages.UserFocused.Render()
@@ -80,7 +94,11 @@ func (m *UserMessageItem) Render(width int) string {
 	for i, line := range lines {
 		lines[i] = prefix + line
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	if useCache {
+		m.setCachedPrefixedRender(out, width, key)
+	}
+	return out
 }
 
 // ID implements MessageItem.

From 6938dedd6cde0378f428b2a90d1591f69dbce6eb Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 10:29:19 -0400
Subject: [PATCH 02/10] perf: batch streaming message updates

Group rapid streaming updates into one save and one notification per short
window instead of one per token. Important updates like finishes, tool
calls, and errors still go through immediately. Cuts database writes and
UI redraws by orders of magnitude during long responses.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/agent/agent.go             |  14 +
 internal/app/app.go                 |  16 +-
 internal/backend/session.go         |   6 +
 internal/message/message.go         | 341 +++++++++++++-
 internal/message/message_test.go    | 695 ++++++++++++++++++++++++++++
 internal/pubsub/broker.go           | 143 +++++-
 internal/workspace/app_workspace.go |   6 +
 7 files changed, 1189 insertions(+), 32 deletions(-)
 create mode 100644 internal/message/message_test.go

diff --git a/internal/agent/agent.go b/internal/agent/agent.go
index 66062270c669ffaba298980ff997c6e2c2e04c2e..ba1215e9a22ca425f745e6e00c85908737d50a0e 100644
--- a/internal/agent/agent.go
+++ b/internal/agent/agent.go
@@ -245,6 +245,15 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy
 
 	defer cancel()
 	defer a.activeRequests.Del(call.SessionID)
+	// Drain any debounced message updates before returning. message.Service
+	// already flushes synchronously on terminal updates, but a defer here
+	// guarantees the contract at every Run exit (success, error, panic
+	// recovery upstream) without callers needing to know.
+	defer func() {
+		if flushErr := a.messages.FlushAll(ctx); flushErr != nil {
+			slog.Error("Failed to flush pending message updates after run", "error", flushErr)
+		}
+	}()
 
 	history, files := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages, call.Attachments...)
 
@@ -653,6 +662,11 @@ func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fan
 	a.activeRequests.Set(sessionID, cancel)
 	defer a.activeRequests.Del(sessionID)
 	defer cancel()
+	defer func() {
+		if flushErr := a.messages.FlushAll(ctx); flushErr != nil {
+			slog.Error("Failed to flush pending message updates after summarize", "error", flushErr)
+		}
+	}()
 
 	agent := fantasy.NewAgent(largeModel.Model,
 		fantasy.WithSystemPrompt(string(summaryPrompt)),
diff --git a/internal/app/app.go b/internal/app/app.go
index a167ca8638c8497a6d6f4260782ba334c6dbe0c3..a5414246b7819cbafccc452c3b79580564c9b68c 100644
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -582,13 +582,23 @@ func (app *App) Shutdown() {
 		app.AgentCoordinator.CancelAll()
 	}
 
-	// Now run remaining cleanup tasks in parallel.
-	var wg sync.WaitGroup
-
 	// Shared shutdown context for all timeout-bounded cleanup.
 	shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
+	// Drain any debounced message updates before the DB-close cleanup
+	// runs in the parallel block below. message.Service buffers
+	// streaming deltas (see internal/message/message.go) and we must
+	// land them while the connection is still open.
+	if app.Messages != nil {
+		if err := app.Messages.FlushAll(shutdownCtx); err != nil {
+			slog.Error("Failed to flush pending message updates on shutdown", "error", err)
+		}
+	}
+
+	// Now run remaining cleanup tasks in parallel.
+	var wg sync.WaitGroup
+
 	// Send exit event
 	wg.Go(func() {
 		event.AppExited()
diff --git a/internal/backend/session.go b/internal/backend/session.go
index 10e21ed8932ccbc990a525785166517cd231595c..9542282319f766e384d9ffe7942e9c235ae63e29 100644
--- a/internal/backend/session.go
+++ b/internal/backend/session.go
@@ -72,6 +72,12 @@ func (b *Backend) ListSessionMessages(ctx context.Context, workspaceID, sessionI
 		return nil, err
 	}
 
+	// Drain debounced updates so HTTP clients (and the TUI on session
+	// switch) observe the latest in-memory state rather than racing the
+	// debounce timer in message.Service.
+	if err := ws.Messages.FlushAll(ctx); err != nil {
+		return nil, err
+	}
 	return ws.Messages.List(ctx, sessionID)
 }
 
diff --git a/internal/message/message.go b/internal/message/message.go
index 6da8827b72227602dc36c39b6a2254aba18d2b0d..9e8d258c543d9f9aed31518b848439456b5e4336 100644
--- a/internal/message/message.go
+++ b/internal/message/message.go
@@ -5,6 +5,7 @@ import (
 	"database/sql"
 	"encoding/json"
 	"fmt"
+	"sync"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/db"
@@ -12,6 +13,13 @@ import (
 	"github.com/google/uuid"
 )
 
+// defaultUpdateDebounce is the default debounce window for [Service.Update].
+// Streaming deltas that arrive within the window are coalesced into a
+// single SQL write and a single pubsub event. Terminal updates
+// (finish/error/cancel/tool-call structural changes) bypass the
+// debounce and flush synchronously.
+const defaultUpdateDebounce = 33 * time.Millisecond
+
 type CreateMessageParams struct {
 	Role             MessageRole
 	Parts            []ContentPart
@@ -20,6 +28,21 @@ type CreateMessageParams struct {
 	IsSummaryMessage bool
 }
 
+// Service is the public interface to the message store.
+//
+// [Service.Update] is eventually consistent: it accepts new state into
+// an in-memory buffer and writes it to SQLite plus publishes a
+// [pubsub.UpdatedEvent] on the next debounce tick (default
+// [defaultUpdateDebounce]) or on the next terminal-state update,
+// whichever comes first. Terminal-state updates — those that finish
+// the message, add or finish a tool call, or end a reasoning section —
+// flush synchronously before [Service.Update] returns.
+//
+// Callers that need stronger ordering (e.g. tests, shutdown,
+// session-switch reads) must use [Service.Flush] or [Service.FlushAll]
+// before reading via [Service.Get] / [Service.List]. Without an
+// explicit flush, a read can race the debounce timer and miss the
+// most recent in-memory state.
 type Service interface {
 	pubsub.Subscriber[Message]
 	Create(ctx context.Context, sessionID string, params CreateMessageParams) (Message, error)
@@ -30,20 +53,87 @@ type Service interface {
 	ListAllUserMessages(ctx context.Context) ([]Message, error)
 	Delete(ctx context.Context, id string) error
 	DeleteSessionMessages(ctx context.Context, sessionID string) error
+
+	// Flush synchronously drains any pending debounced state for the
+	// given message ID, performs the SQL write, and publishes the
+	// resulting [pubsub.UpdatedEvent]. Idempotent; cheap no-op if no
+	// updates are pending. Use this before any read that must observe
+	// the latest [Service.Update].
+	Flush(ctx context.Context, id string) error
+
+	// FlushAll synchronously drains pending debounced state for every
+	// message known to the service. Intended for shutdown and
+	// session-switch paths.
+	FlushAll(ctx context.Context) error
+}
+
+// pendingState holds the in-memory coalescing buffer for a single
+// message ID. All fields except where noted are guarded by
+// service.mu. The flushing flag serializes concurrent flushers for
+// the same ID so SQL writes never reorder.
+type pendingState struct {
+	// latest is the most recent [Message] passed to [Service.Update]
+	// that has not yet been flushed.
+	latest Message
+
+	// dirty is true when latest contains state that has not been
+	// written to SQL since the last successful flush.
+	dirty bool
+
+	// flushing is true while a goroutine is performing the SQL write
+	// for this ID. New updates are still accepted (and re-mark dirty)
+	// but other flushers must back off.
+	flushing bool
+
+	// timer is the active debounce timer, or nil if no flush is
+	// scheduled. Stopped and reset when a terminal update preempts
+	// the debounce window.
+	timer *time.Timer
+
+	// lastFlushed is the snapshot most recently written to SQL. Used
+	// as the baseline for terminal-state detection.
+	lastFlushed Message
+
+	// hasFlushed is false until the first successful write for this
+	// ID; until then lastFlushed is the zero value and must not be
+	// treated as a real prior state.
+	hasFlushed bool
 }
 
 type service struct {
 	*pubsub.Broker[Message]
-	q db.Querier
+	q        db.Querier
+	debounce time.Duration
+
+	mu      sync.Mutex
+	pending map[string]*pendingState
 }
 
-func NewService(q db.Querier) Service {
-	return &service{
-		Broker: pubsub.NewBroker[Message](),
-		q:      q,
+// ServiceOption configures a [Service] at construction.
+type ServiceOption func(*service)
+
+// WithDebounce overrides the debounce window for [Service.Update]. A
+// zero or negative value disables debouncing entirely (every update
+// flushes synchronously). Intended primarily for tests.
+func WithDebounce(d time.Duration) ServiceOption {
+	return func(s *service) {
+		s.debounce = d
 	}
 }
 
+func NewService(q db.Querier, opts ...ServiceOption) Service {
+	s := &service{
+		Broker:   pubsub.NewBroker[Message](),
+		q:        q,
+		debounce: defaultUpdateDebounce,
+		pending:  make(map[string]*pendingState),
+	}
+	for _, opt := range opts {
+		opt(s)
+	}
+	return s
+}
+
 func (s *service) Delete(ctx context.Context, id string) error {
 	message, err := s.Get(ctx, id)
 	if err != nil {
@@ -53,6 +143,16 @@ func (s *service) Delete(ctx context.Context, id string) error {
 	if err != nil {
 		return err
 	}
+	// Drop any pending coalesced state for this ID. We never want to
+	// flush back over a deleted row.
+	s.mu.Lock()
+	if p, ok := s.pending[id]; ok {
+		if p.timer != nil {
+			p.timer.Stop()
+		}
+		delete(s.pending, id)
+	}
+	s.mu.Unlock()
 	// Clone the message before publishing to avoid race conditions with
 	// concurrent modifications to the Parts slice.
 	s.Publish(pubsub.DeletedEvent, message.Clone())
@@ -111,31 +211,240 @@ func (s *service) DeleteSessionMessages(ctx context.Context, sessionID string) e
 	return nil
 }
 
-func (s *service) Update(ctx context.Context, message Message) error {
-	parts, err := marshalParts(message.Parts)
+// Update accepts a new state for a message and either flushes
+// synchronously (terminal updates, debounce <= 0) or buffers it until
+// the next debounce tick. See [Service] for the contract.
+func (s *service) Update(ctx context.Context, msg Message) error {
+	cloned := msg.Clone()
+
+	// Zero or negative debounce: flush every update synchronously. This
+	// preserves the pre-coalescing behaviour for tests and any caller
+	// that explicitly opted out via [WithDebounce].
+	if s.debounce <= 0 {
+		s.mu.Lock()
+		p, ok := s.pending[msg.ID]
+		if !ok {
+			p = &pendingState{}
+			s.pending[msg.ID] = p
+		}
+		p.latest = cloned
+		p.dirty = true
+		s.mu.Unlock()
+		return s.flushOne(ctx, msg.ID, true)
+	}
+
+	s.mu.Lock()
+	p, ok := s.pending[msg.ID]
+	if !ok {
+		p = &pendingState{}
+		s.pending[msg.ID] = p
+	}
+	p.latest = cloned
+	p.dirty = true
+
+	var prev *Message
+	if p.hasFlushed {
+		prev = &p.lastFlushed
+	}
+	terminal := shouldFlushNow(prev, &cloned)
+
+	if terminal {
+		if p.timer != nil {
+			p.timer.Stop()
+			p.timer = nil
+		}
+		s.mu.Unlock()
+		return s.flushOne(ctx, msg.ID, true)
+	}
+
+	// Debounce: schedule a single flush per pending state. If a flush
+	// is already running we let it finish; the trailing dirty bit will
+	// be picked up by the next Update or by Flush.
+	if p.timer == nil && !p.flushing {
+		id := msg.ID
+		p.timer = time.AfterFunc(s.debounce, func() {
+			// Detached from caller ctx so a cancelled stream context
+			// does not strand the buffered write.
+			_ = s.flushOne(context.Background(), id, false)
+		})
+	}
+	s.mu.Unlock()
+	return nil
+}
+
+// Flush implements [Service.Flush].
+func (s *service) Flush(ctx context.Context, id string) error {
+	return s.flushOne(ctx, id, true)
+}
+
+// FlushAll implements [Service.FlushAll]. It snapshots every ID with
+// outstanding work — either dirty buffered state or a flush already in
+// flight — then drains each one. Picking up in-flight IDs ensures
+// FlushAll cannot return while a timer-fired write is still mid-SQL,
+// which is what shutdown and session-switch callers rely on.
+func (s *service) FlushAll(ctx context.Context) error {
+	s.mu.Lock()
+	ids := make([]string, 0, len(s.pending))
+	for id, p := range s.pending {
+		if p.dirty || p.flushing {
+			ids = append(ids, id)
+		}
+	}
+	s.mu.Unlock()
+	var firstErr error
+	for _, id := range ids {
+		if err := s.flushOne(ctx, id, true); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+// flushOne drains a single message ID. When syncCaller is true the
+// caller is willing to wait through a concurrent in-flight flush so
+// that, on return, lastFlushed equals latest at the moment of return.
+// When false (timer-fired path) we bail if another flusher is already
+// running; that flusher will pick up the trailing dirty bit.
+//
+// Order matters: a sync caller must wait for any in-flight flush to
+// drain even when the buffer is currently clean — that in-flight
+// write has not yet updated the SQL row, so returning early would
+// violate the contract that on success lastFlushed reflects the most
+// recent state.
+func (s *service) flushOne(ctx context.Context, id string, syncCaller bool) error {
+	for {
+		s.mu.Lock()
+		p, ok := s.pending[id]
+		if !ok {
+			s.mu.Unlock()
+			return nil
+		}
+		if p.flushing {
+			if !syncCaller {
+				s.mu.Unlock()
+				return nil
+			}
+			s.mu.Unlock()
+			// Brief yield; in-flight write should land in <1ms typical.
+			time.Sleep(time.Millisecond)
+			continue
+		}
+		if !p.dirty {
+			s.mu.Unlock()
+			return nil
+		}
+
+		if p.timer != nil {
+			p.timer.Stop()
+			p.timer = nil
+		}
+		snap := p.latest
+		// Decide whether this snapshot represents a terminal event
+		// against the prior baseline. We must do this before resetting
+		// dirty/flushing because shouldFlushNow looks at p.lastFlushed
+		// (which is what was on disk before this write).
+		var prev *Message
+		if p.hasFlushed {
+			prev = &p.lastFlushed
+		}
+		isTerminal := shouldFlushNow(prev, &snap)
+		p.flushing = true
+		p.dirty = false
+		s.mu.Unlock()
+
+		err := s.write(ctx, snap)
+
+		s.mu.Lock()
+		p.flushing = false
+		if err == nil {
+			p.lastFlushed = snap
+			p.hasFlushed = true
+		} else {
+			// Restore dirty so the next caller retries.
+			p.dirty = true
+		}
+		// If a delta arrived during the SQL write and we are a sync
+		// caller, the user expects that delta to land too.
+		wasDirty := p.dirty
+		s.mu.Unlock()
+
+		if err != nil {
+			return err
+		}
+
+		// Terminal events — message finished, tool call added or
+		// finished, reasoning ended — use the bounded must-deliver
+		// path so they never get dropped under channel contention.
+		if isTerminal {
+			s.PublishMustDeliver(ctx, pubsub.UpdatedEvent, snap)
+		} else {
+			s.Publish(pubsub.UpdatedEvent, snap)
+		}
+
+		if wasDirty && syncCaller {
+			continue
+		}
+		return nil
+	}
+}
+
+// write performs the unguarded SQL write + UpdatedAt stamp. Caller
+// owns publishing.
+func (s *service) write(ctx context.Context, msg Message) error {
+	parts, err := marshalParts(msg.Parts)
 	if err != nil {
 		return err
 	}
 	finishedAt := sql.NullInt64{}
-	if f := message.FinishPart(); f != nil {
+	if f := msg.FinishPart(); f != nil {
 		finishedAt.Int64 = f.Time
 		finishedAt.Valid = true
 	}
-	err = s.q.UpdateMessage(ctx, db.UpdateMessageParams{
-		ID:         message.ID,
+	if err := s.q.UpdateMessage(ctx, db.UpdateMessageParams{
+		ID:         msg.ID,
 		Parts:      string(parts),
 		FinishedAt: finishedAt,
-	})
-	if err != nil {
+	}); err != nil {
 		return err
 	}
-	message.UpdatedAt = time.Now().Unix()
-	// Clone the message before publishing to avoid race conditions with
-	// concurrent modifications to the Parts slice.
-	s.Publish(pubsub.UpdatedEvent, message.Clone())
 	return nil
 }
 
+// shouldFlushNow returns true when next represents a structural
+// change that must not be silently coalesced: the message just
+// finished, the tool-call set grew, a tool call transitioned to
+// finished, or reasoning just finished. prev is the last-flushed
+// snapshot (or nil if no write has landed yet).
+func shouldFlushNow(prev, next *Message) bool {
+	if next.IsFinished() {
+		return true
+	}
+
+	var prevCalls []ToolCall
+	var prevReasoningFinishedAt int64
+	if prev != nil {
+		prevCalls = prev.ToolCalls()
+		prevReasoningFinishedAt = prev.ReasoningContent().FinishedAt
+	}
+	nextCalls := next.ToolCalls()
+	if len(nextCalls) != len(prevCalls) {
+		return true
+	}
+	for i := range nextCalls {
+		// Bounds-safe: lengths are equal here.
+		if nextCalls[i].Finished != prevCalls[i].Finished {
+			return true
+		}
+		// A tool call's input only matters once it has landed (Finished
+		// flips true). Earlier deltas to Input are debounced with the
+		// rest of the streaming state.
+	}
+	if next.ReasoningContent().FinishedAt > 0 && prevReasoningFinishedAt == 0 {
+		return true
+	}
+	return false
+}
+
 func (s *service) Get(ctx context.Context, id string) (Message, error) {
 	dbMessage, err := s.q.GetMessage(ctx, id)
 	if err != nil {
diff --git a/internal/message/message_test.go b/internal/message/message_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..0a046337542606ae10aad16ef8cb36d1eb879c03
--- /dev/null
+++ b/internal/message/message_test.go
@@ -0,0 +1,695 @@
+package message
+
+import (
+	"context"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/db"
+	"github.com/charmbracelet/crush/internal/pubsub"
+	"github.com/charmbracelet/crush/internal/session"
+	"github.com/stretchr/testify/require"
+)
+
+// slowUpdateQuerier wraps a [db.Querier] and forces UpdateMessage to
+// hang on a release channel. Used to simulate an in-flight SQL write.
+type slowUpdateQuerier struct {
+	db.Querier
+	release   chan struct{}
+	started   chan struct{}
+	startOnce sync.Once
+}
+
+func (s *slowUpdateQuerier) UpdateMessage(ctx context.Context, arg db.UpdateMessageParams) error {
+	s.startOnce.Do(func() { close(s.started) })
+	select {
+	case <-s.release:
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+	return s.Querier.UpdateMessage(ctx, arg)
+}
+
+// newTestService spins up a fresh in-memory message.Service backed by a
+// temporary on-disk SQLite database. Returns the service plus a session
+// ID to attach messages to.
+func newTestService(t *testing.T, opts ...ServiceOption) (Service, string) {
+	t.Helper()
+	conn, err := db.Connect(t.Context(), t.TempDir())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = conn.Close() })
+
+	q := db.New(conn)
+	sessions := session.NewService(q, conn)
+	sess, err := sessions.Create(t.Context(), "test")
+	require.NoError(t, err)
+
+	svc := NewService(q, opts...)
+	return svc, sess.ID
+}
+
+// eventCollector consumes broker events into a slice in a goroutine
+// and exposes thread-safe Snapshot / Reset helpers for assertions.
+type eventCollector struct {
+	mu     sync.Mutex
+	events []pubsub.Event[Message]
+}
+
+func collect(ctx context.Context, sub <-chan pubsub.Event[Message]) *eventCollector {
+	c := &eventCollector{}
+	go func() {
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case ev, ok := <-sub:
+				if !ok {
+					return
+				}
+				c.mu.Lock()
+				c.events = append(c.events, ev)
+				c.mu.Unlock()
+			}
+		}
+	}()
+	return c
+}
+
+func (c *eventCollector) snapshot() []pubsub.Event[Message] {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	out := make([]pubsub.Event[Message], len(c.events))
+	copy(out, c.events)
+	return out
+}
+
+func (c *eventCollector) reset() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.events = nil
+}
+
+func TestUpdate_DebouncesTextDeltas(t *testing.T) {
+	t.Parallel()
+
+	// Long-enough debounce that we can verify nothing flushes prematurely.
+	svc, sessionID := newTestService(t, WithDebounce(50*time.Millisecond))
+
+	subCtx, cancelSub := context.WithCancel(t.Context())
+	defer cancelSub()
+	sub := svc.Subscribe(subCtx)
+	collector := collect(subCtx, sub)
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{
+		Role: Assistant,
+	})
+	require.NoError(t, err)
+	// Drop the CreatedEvent emitted by Create.
+	time.Sleep(5 * time.Millisecond)
+	collector.reset()
+
+	// Push 5 deltas inside a single debounce window.
+	for i := 0; i < 5; i++ {
+		msg.AppendContent("a")
+		require.NoError(t, svc.Update(t.Context(), msg))
+	}
+
+	// Before the debounce expires no UpdatedEvent should have landed.
+	time.Sleep(10 * time.Millisecond)
+	require.Empty(t, collector.snapshot(), "no events should land before debounce window expires")
+
+	// Wait for the debounce timer to fire.
+	require.Eventually(t, func() bool {
+		return len(collector.snapshot()) >= 1
+	}, time.Second, 5*time.Millisecond)
+	events := collector.snapshot()
+	require.Len(t, events, 1, "5 deltas should coalesce into 1 UpdatedEvent")
+	require.Equal(t, pubsub.UpdatedEvent, events[0].Type)
+	require.Equal(t, "aaaaa", events[0].Payload.Content().Text)
+
+	// Final state must be persisted.
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "aaaaa", got.Content().Text)
+}
+
+func TestUpdate_TerminalUpdatesFlushSynchronously(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	subCtx, cancelSub := context.WithCancel(t.Context())
+	defer cancelSub()
+	sub := svc.Subscribe(subCtx)
+	collector := collect(subCtx, sub)
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	time.Sleep(5 * time.Millisecond)
+	collector.reset()
+
+	// AddFinish makes the message terminal; Update must flush
+	// synchronously even with a 1-hour debounce.
+	msg.AppendContent("done")
+	msg.AddFinish(FinishReasonEndTurn, "", "")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	require.Eventually(t, func() bool {
+		return len(collector.snapshot()) >= 1
+	}, time.Second, 5*time.Millisecond,
+		"terminal update must publish without waiting for debounce")
+	events := collector.snapshot()
+	require.Len(t, events, 1)
+	require.True(t, events[0].Payload.IsFinished())
+
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.True(t, got.IsFinished())
+}
+
+func TestUpdate_ToolCallStructuralChangeFlushes(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+
+	// Adding a new tool call is a structural change → sync flush.
+	msg.AddToolCall(ToolCall{ID: "tc1", Name: "view", Finished: false})
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Len(t, got.ToolCalls(), 1)
+	require.Equal(t, "tc1", got.ToolCalls()[0].ID)
+
+	// Marking the tool call finished is also a structural change.
+	msg.AddToolCall(ToolCall{ID: "tc1", Name: "view", Input: "{}", Finished: true})
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	got, err = svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.True(t, got.ToolCalls()[0].Finished)
+}
+
+func TestUpdate_ReasoningEndFlushes(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+
+	// Reasoning deltas alone debounce.
+	msg.AppendReasoningContent("hmm")
+	require.NoError(t, svc.Update(t.Context(), msg))
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Empty(t, got.ReasoningContent().Thinking, "reasoning delta should still be in the debounce buffer")
+
+	// FinishThinking sets FinishedAt → terminal flush.
+	msg.FinishThinking()
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	got, err = svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "hmm", got.ReasoningContent().Thinking)
+	require.NotZero(t, got.ReasoningContent().FinishedAt)
+}
+
+func TestFlush_DrainsPendingDebouncedUpdates(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("buffered")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	// Without a flush the SQL row is unchanged from Create.
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Empty(t, got.Content().Text)
+
+	require.NoError(t, svc.Flush(t.Context(), msg.ID))
+
+	got, err = svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "buffered", got.Content().Text)
+
+	// Subsequent Flush is a no-op.
+	require.NoError(t, svc.Flush(t.Context(), msg.ID))
+}
+
+func TestFlushAll_DrainsAllPending(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	const n = 5
+	msgs := make([]Message, n)
+	for i := range msgs {
+		m, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+		require.NoError(t, err)
+		m.AppendContent("hi")
+		require.NoError(t, svc.Update(t.Context(), m))
+		msgs[i] = m
+	}
+
+	require.NoError(t, svc.FlushAll(t.Context()))
+
+	for _, m := range msgs {
+		got, err := svc.Get(t.Context(), m.ID)
+		require.NoError(t, err)
+		require.Equal(t, "hi", got.Content().Text, "FlushAll should drain every pending message")
+	}
+}
+
+func TestUpdate_OrderingMatchesNonCoalesced(t *testing.T) {
+	t.Parallel()
+
+	// Compare the final state after coalesced vs zero-debounce updates.
+	// A sequence of interleaved text/reasoning/tool-call updates must
+	// converge to the same final DB row either way.
+	build := func(svc Service, sessionID string) Message {
+		msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+		require.NoError(t, err)
+		msg.AppendReasoningContent("thinking 1 ")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AppendReasoningContent("thinking 2")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.FinishThinking()
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AppendContent("hello ")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AppendContent("world")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AddToolCall(ToolCall{ID: "tc", Name: "x", Finished: false})
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AddToolCall(ToolCall{ID: "tc", Name: "x", Input: "{}", Finished: true})
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AddFinish(FinishReasonEndTurn, "", "")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		return msg
+	}
+
+	coalesced, sid1 := newTestService(t, WithDebounce(20*time.Millisecond))
+	a := build(coalesced, sid1)
+	require.NoError(t, coalesced.FlushAll(t.Context()))
+	gotA, err := coalesced.Get(t.Context(), a.ID)
+	require.NoError(t, err)
+
+	immediate, sid2 := newTestService(t, WithDebounce(0))
+	b := build(immediate, sid2)
+	gotB, err := immediate.Get(t.Context(), b.ID)
+	require.NoError(t, err)
+
+	require.Equal(t, gotA.Content().Text, gotB.Content().Text)
+	require.Equal(t, gotA.ReasoningContent().Thinking, gotB.ReasoningContent().Thinking)
+	require.Equal(t, len(gotA.ToolCalls()), len(gotB.ToolCalls()))
+	require.Equal(t, gotA.IsFinished(), gotB.IsFinished())
+}
+
+func TestDelete_DropsPendingState(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("dropped")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	require.NoError(t, svc.Delete(t.Context(), msg.ID))
+
+	// FlushAll after Delete must not write to the deleted row.
+	require.NoError(t, svc.FlushAll(t.Context()))
+
+	_, err = svc.Get(t.Context(), msg.ID)
+	require.Error(t, err, "deleted message must remain deleted")
+}
+
+func TestBroker_PublishLossyDropCounter(t *testing.T) {
+	t.Parallel()
+
+	// Tiny channel buffer so we can saturate from a single sender.
+	b := pubsub.NewBrokerWithOptions[int](1, 1000)
+	defer b.Shutdown()
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := b.Subscribe(subCtx)
+	require.NotNil(t, sub)
+
+	// Don't read from sub. Saturate the buffer.
+	for range 100 {
+		b.Publish(pubsub.UpdatedEvent, 1)
+	}
+
+	require.GreaterOrEqual(t, b.DropCount(), uint64(1),
+		"lossy Publish must increment the drop counter under contention")
+}
+
+func TestBroker_PublishMustDeliverHonorsTimeout(t *testing.T) {
+	t.Parallel()
+
+	b := pubsub.NewBrokerWithOptions[int](1, 1000)
+	b.SetMustDeliverTimeout(20 * time.Millisecond)
+	defer b.Shutdown()
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := b.Subscribe(subCtx)
+	require.NotNil(t, sub)
+
+	// Saturate: one event sits in the buffer, the second must wait.
+	b.Publish(pubsub.UpdatedEvent, 1)
+
+	// PublishMustDeliver should block up to 20ms then drop.
+	start := time.Now()
+	b.PublishMustDeliver(t.Context(), pubsub.UpdatedEvent, 2)
+	elapsed := time.Since(start)
+
+	require.GreaterOrEqual(t, elapsed, 20*time.Millisecond,
+		"PublishMustDeliver should block at least the timeout under contention")
+	require.Less(t, elapsed, 200*time.Millisecond,
+		"PublishMustDeliver must not block indefinitely")
+	require.GreaterOrEqual(t, b.MustDeliverDropCount(), uint64(1),
+		"timeout must increment the must-deliver drop counter")
+}
+
+func TestBroker_PublishMustDeliverWithReader(t *testing.T) {
+	t.Parallel()
+
+	b := pubsub.NewBrokerWithOptions[int](1, 1000)
+	b.SetMustDeliverTimeout(50 * time.Millisecond)
+	defer b.Shutdown()
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := b.Subscribe(subCtx)
+
+	var received atomic.Uint64
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		for {
+			select {
+			case <-subCtx.Done():
+				return
+			case _, ok := <-sub:
+				if !ok {
+					return
+				}
+				received.Add(1)
+			}
+		}
+	}()
+
+	for i := range 10 {
+		b.PublishMustDeliver(t.Context(), pubsub.UpdatedEvent, i)
+	}
+
+	// All 10 should land within the must-deliver timeout window.
+	require.Eventually(t, func() bool { return received.Load() == 10 },
+		time.Second, 5*time.Millisecond,
+		"all must-deliver events should reach an active subscriber")
+	require.Zero(t, b.MustDeliverDropCount(),
+		"no drops expected when subscriber drains promptly")
+}
+
+func TestUpdate_TerminalEventUsesMustDeliver(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := svc.Subscribe(subCtx)
+
+	var seenFinish atomic.Bool
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		for {
+			select {
+			case <-subCtx.Done():
+				return
+			case ev, ok := <-sub:
+				if !ok {
+					return
+				}
+				if ev.Type == pubsub.UpdatedEvent && ev.Payload.IsFinished() {
+					seenFinish.Store(true)
+				}
+			}
+		}
+	}()
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("final")
+	msg.AddFinish(FinishReasonEndTurn, "", "")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	require.Eventually(t, func() bool { return seenFinish.Load() },
+		time.Second, 10*time.Millisecond,
+		"terminal update must reach subscribers via the must-deliver path")
+}
+
+func TestUpdate_ZeroDebounceFlushesEveryUpdate(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(0))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+
+	for i := 0; i < 3; i++ {
+		msg.AppendContent("x")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		got, err := svc.Get(t.Context(), msg.ID)
+		require.NoError(t, err)
+		require.Len(t, got.Content().Text, i+1, "every update must land synchronously when debounce is 0")
+	}
+}
+
+// TestFlush_WaitsForInFlightWrite reproduces the failure where Flush
+// or FlushAll could return before a concurrent in-flight SQL write
+// completed. We block UpdateMessage on a release channel, fire the
+// debounce timer, then call Flush and assert it does not return until
+// the in-flight write actually lands.
+func TestFlush_WaitsForInFlightWrite(t *testing.T) {
+	t.Parallel()
+
+	conn, err := db.Connect(t.Context(), t.TempDir())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = conn.Close() })
+
+	q := db.New(conn)
+	sessions := session.NewService(q, conn)
+	sess, err := sessions.Create(t.Context(), "test")
+	require.NoError(t, err)
+
+	slow := &slowUpdateQuerier{
+		Querier: q,
+		release: make(chan struct{}),
+		started: make(chan struct{}),
+	}
+	// Short debounce so the timer fires quickly.
+	svc := NewService(slow, WithDebounce(10*time.Millisecond))
+
+	msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("payload")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	// Wait for the timer-fired flush to enter UpdateMessage.
+	select {
+	case <-slow.started:
+	case <-time.After(time.Second):
+		t.Fatal("timer-fired flush never reached UpdateMessage")
+	}
+
+	// At this point the buffer is dirty=false but flushing=true. A
+	// naive Flush would early-return on !dirty. Spawn Flush in a
+	// goroutine and assert it has not returned while the write is
+	// still blocked.
+	flushDone := make(chan error, 1)
+	go func() { flushDone <- svc.Flush(t.Context(), msg.ID) }()
+
+	select {
+	case err := <-flushDone:
+		t.Fatalf("Flush returned %v while in-flight write was still blocked", err)
+	case <-time.After(50 * time.Millisecond):
+		// Expected: Flush is correctly waiting.
+	}
+
+	// Release the slow write; Flush must now return cleanly.
+	close(slow.release)
+	select {
+	case err := <-flushDone:
+		require.NoError(t, err)
+	case <-time.After(time.Second):
+		t.Fatal("Flush did not return after in-flight write completed")
+	}
+
+	// The SQL row should now reflect the buffered payload.
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "payload", got.Content().Text)
+}
+
+// TestFlushAll_WaitsForInFlightWrite asserts FlushAll picks up IDs
+// whose buffer is currently flushing (dirty=false) so shutdown and
+// session-switch callers don't return while an SQL write is mid-flight.
+func TestFlushAll_WaitsForInFlightWrite(t *testing.T) {
+	t.Parallel()
+
+	conn, err := db.Connect(t.Context(), t.TempDir())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = conn.Close() })
+
+	q := db.New(conn)
+	sessions := session.NewService(q, conn)
+	sess, err := sessions.Create(t.Context(), "test")
+	require.NoError(t, err)
+
+	slow := &slowUpdateQuerier{
+		Querier: q,
+		release: make(chan struct{}),
+		started: make(chan struct{}),
+	}
+	svc := NewService(slow, WithDebounce(10*time.Millisecond))
+
+	msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("payload")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	select {
+	case <-slow.started:
+	case <-time.After(time.Second):
+		t.Fatal("timer-fired flush never reached UpdateMessage")
+	}
+
+	flushDone := make(chan error, 1)
+	go func() { flushDone <- svc.FlushAll(t.Context()) }()
+
+	select {
+	case err := <-flushDone:
+		t.Fatalf("FlushAll returned %v while in-flight write was still blocked", err)
+	case <-time.After(50 * time.Millisecond):
+	}
+
+	close(slow.release)
+	select {
+	case err := <-flushDone:
+		require.NoError(t, err)
+	case <-time.After(time.Second):
+		t.Fatal("FlushAll did not return after in-flight write completed")
+	}
+
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "payload", got.Content().Text)
+}
+
+// TestUpdate_StructuralFlushUsesMustDeliver covers the second review
+// finding: structural terminal events (tool-call add, tool-call
+// finish, reasoning end) must publish via the must-deliver path even
+// when the message itself is not yet IsFinished.
+//
+// We detect which path was taken by saturating a subscriber's channel
+// buffer with no reader. With a short must-deliver timeout, the
+// must-deliver path increments [pubsub.Broker.MustDeliverDropCount]
+// after the timeout expires; the lossy path increments
+// [pubsub.Broker.DropCount] immediately. The two counters are
+// disjoint, so they precisely identify which call site published the
+// event.
+func TestUpdate_StructuralFlushUsesMustDeliver(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name string
+		mut  func(*Message)
+	}{
+		{
+			name: "tool call add",
+			mut: func(m *Message) {
+				m.AddToolCall(ToolCall{ID: "tc1", Name: "view"})
+			},
+		},
+		{
+			name: "tool call finish",
+			mut: func(m *Message) {
+				m.AddToolCall(ToolCall{ID: "tc1", Name: "view", Input: "{}", Finished: true})
+			},
+		},
+		{
+			name: "reasoning end",
+			mut: func(m *Message) {
+				m.AppendReasoningContent("hmm")
+				m.FinishThinking()
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			conn, err := db.Connect(t.Context(), t.TempDir())
+			require.NoError(t, err)
+			t.Cleanup(func() { _ = conn.Close() })
+
+			q := db.New(conn)
+			sessions := session.NewService(q, conn)
+			sess, err := sessions.Create(t.Context(), "test")
+			require.NoError(t, err)
+
+			// Replace the default broker with a tiny buffer + short
+			// must-deliver timeout so we can fully saturate from a
+			// single sender and observe drops without long waits.
+			svc := NewService(q, WithDebounce(time.Hour))
+			impl := svc.(*service)
+			impl.Shutdown()
+			impl.Broker = pubsub.NewBrokerWithOptions[Message](1, 1000)
+			impl.SetMustDeliverTimeout(40 * time.Millisecond)
+
+			subCtx, cancel := context.WithCancel(t.Context())
+			defer cancel()
+			sub := svc.Subscribe(subCtx)
+
+			msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant})
+			require.NoError(t, err)
+
+			// Saturate the subscriber's buffer (capacity 1). The
+			// CreatedEvent from Create above already left one event
+			// in the buffer; we never read sub, so the next publish
+			// has nowhere to go.
+			_ = sub // intentionally not drained.
+
+			// Drive the structural change. With debounce=1h, Update
+			// flushes synchronously and routes through whichever
+			// publish path the service chose for structural events.
+			tc.mut(&msg)
+			require.NoError(t, svc.Update(t.Context(), msg))
+
+			// Must-deliver timeout (40ms) should have expired with
+			// no drain. If structural events are routed through
+			// must-deliver: MustDeliverDropCount > 0, DropCount
+			// unchanged. If routed through lossy Publish:
+			// DropCount > 0, MustDeliverDropCount == 0.
+			require.Eventually(t, func() bool {
+				return impl.MustDeliverDropCount() >= 1
+			}, time.Second, 5*time.Millisecond,
+				"structural terminal event should publish via must-deliver, not lossy Publish")
+			require.Zero(t, impl.DropCount(),
+				"structural terminal event must not be silently dropped via lossy Publish")
+		})
+	}
+}
diff --git a/internal/pubsub/broker.go b/internal/pubsub/broker.go
index 52e827c2d6306fa2365372b9867810d9b99d0227..03af2e676f67bee68a757eb85a600d7b09838c4a 100644
--- a/internal/pubsub/broker.go
+++ b/internal/pubsub/broker.go
@@ -1,19 +1,54 @@
+// Package pubsub provides a lightweight in-process broker for fan-out
+// event delivery between services and the UI.
+//
+// Delivery semantics:
+//
+//   - [Broker.Publish] is best-effort and lossy under contention. If a
+//     subscriber's channel is full, the event is dropped for that
+//     subscriber and a counter is incremented. This is the right choice
+//     for high-frequency intermediate updates (e.g. streaming token
+//     deltas) where only the latest state matters.
+//
+//   - [Broker.PublishMustDeliver] is bounded-blocking. For each
+//     subscriber it first tries a non-blocking send, then falls back to
+//     a per-subscriber blocking send with a hard timeout. On timeout the
+//     event is dropped for that subscriber, an error is logged, and the
+//     must-deliver drop counter is incremented. The publisher never
+//     blocks indefinitely. This is the right choice for terminal events
+//     (finish, tool result, error, cancel) that must not be silently
+//     coalesced away.
+//
+// Drop counters ([Broker.DropCount], [Broker.MustDeliverDropCount]) are
+// exposed so callers can surface saturation in telemetry.
 package pubsub
 
 import (
 	"context"
+	"log/slog"
 	"sync"
+	"sync/atomic"
+	"time"
 )
 
-const bufferSize = 64
+const (
+	bufferSize = 64
+
+	// defaultMustDeliverTimeout is the per-subscriber upper bound on how
+	// long [Broker.PublishMustDeliver] will block waiting for buffer
+	// space before giving up on that subscriber.
+	defaultMustDeliverTimeout = 50 * time.Millisecond
+)
 
 type Broker[T any] struct {
-	subs              map[chan Event[T]]struct{}
-	mu                sync.RWMutex
-	done              chan struct{}
-	subCount          int
-	maxEvents         int
-	channelBufferSize int
+	subs                 map[chan Event[T]]struct{}
+	mu                   sync.RWMutex
+	done                 chan struct{}
+	subCount             int
+	maxEvents            int
+	channelBufferSize    int
+	mustDeliverTimeout   time.Duration
+	dropCount            atomic.Uint64
+	mustDeliverDropCount atomic.Uint64
 }
 
 func NewBroker[T any]() *Broker[T] {
@@ -22,13 +57,27 @@ func NewBroker[T any]() *Broker[T] {
 
 func NewBrokerWithOptions[T any](channelBufferSize, maxEvents int) *Broker[T] {
 	return &Broker[T]{
-		subs:              make(map[chan Event[T]]struct{}),
-		done:              make(chan struct{}),
-		maxEvents:         maxEvents,
-		channelBufferSize: channelBufferSize,
+		subs:               make(map[chan Event[T]]struct{}),
+		done:               make(chan struct{}),
+		maxEvents:          maxEvents,
+		channelBufferSize:  channelBufferSize,
+		mustDeliverTimeout: defaultMustDeliverTimeout,
 	}
 }
 
+// SetMustDeliverTimeout overrides the per-subscriber timeout used by
+// [Broker.PublishMustDeliver]. A zero or negative value resets to the
+// default. Intended primarily for tests.
+func (b *Broker[T]) SetMustDeliverTimeout(d time.Duration) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if d <= 0 {
+		b.mustDeliverTimeout = defaultMustDeliverTimeout
+		return
+	}
+	b.mustDeliverTimeout = d
+}
+
 func (b *Broker[T]) Shutdown() {
 	select {
 	case <-b.done: // Already closed
@@ -90,6 +139,25 @@ func (b *Broker[T]) GetSubscriberCount() int {
 	return b.subCount
 }
 
+// DropCount returns the cumulative number of events dropped by
+// [Broker.Publish] because a subscriber's channel was full.
+func (b *Broker[T]) DropCount() uint64 {
+	return b.dropCount.Load()
+}
+
+// MustDeliverDropCount returns the cumulative number of events dropped
+// by [Broker.PublishMustDeliver] after the per-subscriber timeout
+// expired.
+func (b *Broker[T]) MustDeliverDropCount() uint64 {
+	return b.mustDeliverDropCount.Load()
+}
+
+// Publish delivers an event to every active subscriber.
+//
+// Delivery is non-blocking and lossy: if a subscriber's channel is full
+// the event is dropped for that subscriber and [Broker.DropCount] is
+// incremented. Use [Broker.PublishMustDeliver] for events that must not
+// be silently dropped.
 func (b *Broker[T]) Publish(t EventType, payload T) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()
@@ -106,8 +174,57 @@ func (b *Broker[T]) Publish(t EventType, payload T) {
 		select {
 		case sub <- event:
 		default:
-			// Channel is full, subscriber is slow - skip this event
-			// This prevents blocking the publisher
+			// Channel is full, subscriber is slow - skip this event.
+			// Lossy by design; counted so saturation is observable.
+			b.dropCount.Add(1)
+		}
+	}
+}
+
+// PublishMustDeliver delivers an event with bounded-blocking semantics.
+// For each subscriber it first attempts a non-blocking send, then falls
+// back to a blocking send bounded by a per-subscriber timeout (default
+// [defaultMustDeliverTimeout]). On timeout the event is dropped for
+// that subscriber, [Broker.MustDeliverDropCount] is incremented, and an
+// error is logged. The publisher never blocks indefinitely.
+//
+// Use this for terminal events that must reach subscribers (finish,
+// tool result, error, cancel). Callers must still tolerate rare drops
+// after timeout — recovery is the subscriber's responsibility (e.g. a
+// re-fetch on the next session-visible event).
+func (b *Broker[T]) PublishMustDeliver(ctx context.Context, t EventType, payload T) {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	select {
+	case <-b.done:
+		return
+	default:
+	}
+
+	event := Event[T]{Type: t, Payload: payload}
+	timeout := b.mustDeliverTimeout
+
+	for sub := range b.subs {
+		// Fast path: non-blocking send.
+		select {
+		case sub <- event:
+			continue
+		default:
+		}
+
+		// Slow path: bounded blocking send.
+		timer := time.NewTimer(timeout)
+		select {
+		case sub <- event:
+			timer.Stop()
+		case <-timer.C:
+			b.mustDeliverDropCount.Add(1)
+			slog.Error("PublishMustDeliver timed out delivering event",
+				"type", t, "timeout", timeout)
+		case <-ctx.Done():
+			timer.Stop()
+			return
 		}
 	}
 }
diff --git a/internal/workspace/app_workspace.go b/internal/workspace/app_workspace.go
index 57b1228e7eacb28a16141283ee2703a33511bd18..d4e5ed790e3a1cf7a4bcc299e4e4e63bedfbacd5 100644
--- a/internal/workspace/app_workspace.go
+++ b/internal/workspace/app_workspace.go
@@ -70,6 +70,12 @@ func (w *AppWorkspace) ParseAgentToolSessionID(sessionID string) (string, string
 // -- Messages --
 
 func (w *AppWorkspace) ListMessages(ctx context.Context, sessionID string) ([]message.Message, error) {
+	// Drain any debounced updates so the caller observes the latest
+	// in-memory state. message.Service buffers streaming deltas and a
+	// cold List would otherwise miss them at session-switch time.
+	if err := w.app.Messages.FlushAll(ctx); err != nil {
+		return nil, err
+	}
 	return w.app.Messages.List(ctx, sessionID)
 }
 

From 8370edb6910e3044f0e7e375685431110a3eac9c Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 11:44:13 -0400
Subject: [PATCH 03/10] perf(chat): cache the parts of an assistant message
 separately
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each assistant message has up to three sections — thinking, response, and
errors — and previously a new streamed token re-rendered all of them. Now
each section is cached on its own, so streaming a response no longer
re-renders the (often long) thinking block above it.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/chat/assistant.go                 | 296 +++++++++--
 .../ui/chat/assistant_section_cache_test.go   | 464 ++++++++++++++++++
 2 files changed, 728 insertions(+), 32 deletions(-)
 create mode 100644 internal/ui/chat/assistant_section_cache_test.go

diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go
index 2977e7000c2945250bb327c1c79dc152b8c51f92..ee7ec44fe606e72ea61f5a79d04f1d73264367f5 100644
--- a/internal/ui/chat/assistant.go
+++ b/internal/ui/chat/assistant.go
@@ -1,7 +1,9 @@
 package chat
 
 import (
+	"encoding/binary"
 	"fmt"
+	"hash/fnv"
 	"strings"
 
 	tea "charm.land/bubbletea/v2"
@@ -20,6 +22,70 @@ const assistantMessageTruncateFormat = "… (%d lines hidden) [click or space to
 // maxCollapsedThinkingHeight defines the maximum height of the thinking
 const maxCollapsedThinkingHeight = 10
 
+// assistantSection is a per-section render cache for AssistantMessageItem.
+// Each section (thinking, content, error) carries its own keys so that
+// streaming a section does not invalidate a different — often more
+// expensive — section's cached render. srcHash is an FNV-64 of the
+// section's source text; extra captures any other state that changes
+// the rendered output (e.g. thinkingExpanded, the thinking footer
+// inputs). valid disambiguates a real cache hit from the zero value
+// when both source text and extras hash to zero. aux carries any
+// per-section side data that the caller needs to recover on a hit
+// (e.g. the thinking box height for click detection).
+type assistantSection struct {
+	width   int
+	srcHash uint64
+	extra   uint64
+	out     string
+	h       int
+	aux     int
+	valid   bool
+}
+
+// hit reports whether the cache entry matches the requested key.
+func (s *assistantSection) hit(width int, srcHash, extra uint64) bool {
+	return s.valid && s.width == width && s.srcHash == srcHash && s.extra == extra
+}
+
+// store records the rendered output under the given key.
+func (s *assistantSection) store(width int, srcHash, extra uint64, out string, aux int) {
+	s.width = width
+	s.srcHash = srcHash
+	s.extra = extra
+	s.out = out
+	s.h = lipgloss.Height(out)
+	s.aux = aux
+	s.valid = true
+}
+
+// reset drops the cached output.
+func (s *assistantSection) reset() {
+	*s = assistantSection{}
+}
+
+// fnv64 hashes a single string with FNV-64.
+func fnv64(s string) uint64 {
+	h := fnv.New64a()
+	_, _ = h.Write([]byte(s))
+	return h.Sum64()
+}
+
+// fnvFields hashes a list of byte fields with length-prefix framing
+// so that no concatenation collision can occur between distinct
+// field tuples (a NUL inside one field cannot impersonate a
+// boundary between two fields). Each field is preceded by its
+// length encoded as 8 bytes little-endian.
+func fnvFields(fields ...[]byte) uint64 {
+	h := fnv.New64a()
+	var lenBuf [8]byte
+	for _, f := range fields {
+		binary.LittleEndian.PutUint64(lenBuf[:], uint64(len(f)))
+		_, _ = h.Write(lenBuf[:])
+		_, _ = h.Write(f)
+	}
+	return h.Sum64()
+}
+
 // AssistantMessageItem represents an assistant message in the chat UI.
 //
 // This item includes thinking, and the content but does not include the tool calls.
@@ -33,6 +99,13 @@ type AssistantMessageItem struct {
 	anim              *anim.Anim
 	thinkingExpanded  bool
 	thinkingBoxHeight int // Tracks the rendered thinking box height for click detection.
+
+	// Per-section render caches. Splitting these out means content
+	// streaming does not invalidate the (often expensive) thinking
+	// render, and vice versa.
+	thinkingSec assistantSection
+	contentSec  assistantSection
+	errorSec    assistantSection
 }
 
 var _ Expandable = (*AssistantMessageItem)(nil)
@@ -88,14 +161,7 @@ func (a *AssistantMessageItem) RawRender(width int) string {
 		spinner = a.renderSpinning()
 	}
 
-	content, height, ok := a.getCachedRender(cappedWidth)
-	if !ok {
-		content = a.renderMessageContent(cappedWidth)
-		height = lipgloss.Height(content)
-		// cache the rendered content
-		a.setCachedRender(content, cappedWidth, height)
-	}
-
+	content, height := a.renderMessageContent(cappedWidth)
 	highlightedContent := a.renderHighlighted(content, cappedWidth, height)
 	if spinner != "" {
 		if highlightedContent != "" {
@@ -116,15 +182,16 @@ func (a *AssistantMessageItem) Render(width int) string {
 	// RawRender, so we can just apply the styles directly to each line.
 	//
 	// The split + per-line prefix loop is O(L); cache the result keyed
-	// by (width, focused) so steady-state Render becomes a pointer
-	// return. Bypass the cache while spinning (RawRender's spinner
-	// suffix changes every animation frame) or while a highlight range
-	// is active (selection drag).
+	// by (width, focused, sectionsFingerprint) so steady-state Render
+	// becomes a pointer return. The sectionsFingerprint folds in the
+	// per-section srcHash/extra so that any sub-cache change
+	// invalidates this prefix cache without requiring an explicit
+	// drop. Bypass the cache while spinning (RawRender's spinner
+	// suffix changes every animation frame) or while a highlight
+	// range is active (selection drag).
 	useCache := !a.isSpinning() && !a.isHighlighted()
-	var key uint64
-	if a.focused {
-		key = 1
-	}
+	cappedWidth := cappedMessageWidth(width)
+	key := a.prefixCacheKey(cappedWidth)
 	if useCache {
 		if cached, ok := a.getCachedPrefixedRender(width, key); ok {
 			return cached
@@ -148,36 +215,182 @@ func (a *AssistantMessageItem) Render(width int) string {
 	return out
 }
 
-// renderMessageContent renders the message content including thinking, main content, and finish reason.
-func (a *AssistantMessageItem) renderMessageContent(width int) string {
+// prefixCacheKey builds the F3 prefixed-render cache key. We pack the
+// focus bit into bit 0 and a fingerprint of the section caches into
+// the upper bits, so any change to a sub-section's source text or
+// extras forces the prefix cache to miss without needing an explicit
+// drop. cappedWidth is included so a cached prefix never survives a
+// section-cache miss caused by a width change. The finish reason is
+// folded in too because it controls the composition of
+// renderMessageContent (e.g. appending the constant "Canceled"
+// string) — that decision lives outside any section's own hash.
+func (a *AssistantMessageItem) prefixCacheKey(cappedWidth int) uint64 {
+	thinkSrc, thinkExtra := a.thinkingKey()
+	contentSrc, contentExtra := a.contentKey()
+	errSrc, errExtra := a.errorKey()
+	h := fnv.New64a()
+	var buf [8]byte
+	writeU64 := func(v uint64) {
+		for i := range 8 {
+			buf[i] = byte(v >> (8 * i))
+		}
+		_, _ = h.Write(buf[:])
+	}
+	writeU64(uint64(cappedWidth))
+	writeU64(thinkSrc)
+	writeU64(thinkExtra)
+	writeU64(contentSrc)
+	writeU64(contentExtra)
+	writeU64(errSrc)
+	writeU64(errExtra)
+	writeU64(a.compositionKey())
+	fingerprint := h.Sum64()
+	var focusBit uint64
+	if a.focused {
+		focusBit = 1
+	}
+	return (fingerprint &^ 1) | focusBit
+}
+
+// compositionKey hashes the inputs to renderMessageContent's structural
+// decisions (which sections to include, whether to append the
+// constant "Canceled" footer) so that flipping IsFinished or the
+// finish reason invalidates the prefix cache even when no section's
+// own source text changed.
+func (a *AssistantMessageItem) compositionKey() uint64 {
+	var finishedFlag byte
+	var reason string
+	if a.message.IsFinished() {
+		finishedFlag = 1
+		reason = string(a.message.FinishReason())
+	}
+	// Length-prefixed framing keeps the finished flag and the reason
+	// string from blending into one another.
+	return fnvFields([]byte{finishedFlag}, []byte(reason))
+}
+
+// renderMessageContent renders the message content including thinking, main
+// content, and finish reason. Each section is served from its own cache;
+// only the section whose source text or extras changed since the last
+// render is recomputed.
+func (a *AssistantMessageItem) renderMessageContent(width int) (string, int) {
 	var messageParts []string
 	thinking := strings.TrimSpace(a.message.ReasoningContent().Thinking)
 	content := strings.TrimSpace(a.message.Content().Text)
-	// if the massage has reasoning content add that first
+
 	if thinking != "" {
-		messageParts = append(messageParts, a.renderThinking(a.message.ReasoningContent().Thinking, width))
+		messageParts = append(messageParts, a.cachedThinking(width))
 	}
 
-	// then add the main content
 	if content != "" {
-		// add a spacer between thinking and content
 		if thinking != "" {
 			messageParts = append(messageParts, "")
 		}
-		messageParts = append(messageParts, a.renderMarkdown(content, width))
+		messageParts = append(messageParts, a.cachedContent(width))
 	}
 
-	// finally add any finish reason info
 	if a.message.IsFinished() {
 		switch a.message.FinishReason() {
 		case message.FinishReasonCanceled:
 			messageParts = append(messageParts, a.sty.Messages.AssistantCanceled.Render("Canceled"))
 		case message.FinishReasonError:
-			messageParts = append(messageParts, a.renderError(width))
+			messageParts = append(messageParts, a.cachedError(width))
 		}
 	}
 
-	return strings.Join(messageParts, "\n")
+	out := strings.Join(messageParts, "\n")
+	return out, lipgloss.Height(out)
+}
+
+// thinkingKey returns the (srcHash, extra) cache key components for the
+// thinking section. extra folds in everything other than the raw
+// thinking text that affects the rendered output: the expanded flag
+// and the footer state (which depends on IsThinking, ToolCalls, and
+// ThinkingDuration).
+func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) {
+	thinking := a.message.ReasoningContent().Thinking
+	srcHash := fnv64(thinking)
+
+	showFooter := !a.message.IsThinking() || len(a.message.ToolCalls()) > 0
+	var durationStr string
+	if showFooter {
+		duration := a.message.ThinkingDuration()
+		if duration.String() != "0s" {
+			durationStr = duration.String()
+		}
+	}
+	var expanded byte
+	if a.thinkingExpanded {
+		expanded = 1
+	}
+	var footer byte
+	if showFooter {
+		footer = 1
+	}
+	// Length-prefixed framing avoids any delimiter collision between
+	// the flag bytes and the duration string.
+	extra := fnvFields([]byte{expanded, footer}, []byte(durationStr))
+	return srcHash, extra
+}
+
+// contentKey returns the (srcHash, extra) cache key components for the
+// main content section.
+func (a *AssistantMessageItem) contentKey() (uint64, uint64) {
+	return fnv64(a.message.Content().Text), 0
+}
+
+// errorKey returns the (srcHash, extra) cache key components for the
+// error section. Returns (0, 0) when no error is present so the cache
+// stays a no-op for non-error messages.
+func (a *AssistantMessageItem) errorKey() (uint64, uint64) {
+	if !a.message.IsFinished() || a.message.FinishReason() != message.FinishReasonError {
+		return 0, 0
+	}
+	finishPart := a.message.FinishPart()
+	if finishPart == nil {
+		return 0, 0
+	}
+	// Length-prefixed framing prevents Message+Details collisions
+	// between distinct (Message, Details) tuples that would
+	// otherwise concatenate to the same byte sequence.
+	return fnvFields([]byte(finishPart.Message), []byte(finishPart.Details)), 0
+}
+
+// cachedThinking returns the rendered thinking section, computing and
+// caching it on miss. The thinking-box height (used for click target
+// detection) is preserved across hits via assistantSection.aux so the
+// cached path never desyncs click detection.
+func (a *AssistantMessageItem) cachedThinking(width int) string {
+	srcHash, extra := a.thinkingKey()
+	if a.thinkingSec.hit(width, srcHash, extra) {
+		a.thinkingBoxHeight = a.thinkingSec.aux
+		return a.thinkingSec.out
+	}
+	out := a.renderThinking(a.message.ReasoningContent().Thinking, width)
+	a.thinkingSec.store(width, srcHash, extra, out, a.thinkingBoxHeight)
+	return out
+}
+
+// cachedContent returns the rendered content section.
+func (a *AssistantMessageItem) cachedContent(width int) string {
+	srcHash, extra := a.contentKey()
+	if a.contentSec.hit(width, srcHash, extra) {
+		return a.contentSec.out
+	}
+	out := a.renderMarkdown(a.message.Content().Text, width)
+	a.contentSec.store(width, srcHash, extra, out, 0)
+	return out
+}
+
+// cachedError returns the rendered error section.
+func (a *AssistantMessageItem) cachedError(width int) string {
+	srcHash, extra := a.errorKey()
+	if a.errorSec.hit(width, srcHash, extra) {
+		return a.errorSec.out
+	}
+	out := a.renderError(width)
+	a.errorSec.store(width, srcHash, extra, out, 0)
+	return out
 }
 
 // renderThinking renders the thinking/reasoning content with footer.
@@ -260,22 +473,41 @@ func (a *AssistantMessageItem) isSpinning() bool {
 	return (isThinking || !isFinished) && !hasContent && !hasToolCalls
 }
 
-// SetMessage is used to update the underlying message.
-func (a *AssistantMessageItem) SetMessage(message *message.Message) tea.Cmd {
+// SetMessage is used to update the underlying message. Only the
+// sub-section caches whose source text or extras changed are
+// invalidated; the others survive and serve cache hits on the next
+// RawRender.
+func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd {
 	wasSpinning := a.isSpinning()
-	a.message = message
-	a.clearCache()
+	a.message = msg
+	// The prefix cache is keyed by a fingerprint that includes every
+	// section's source hash, so an unchanged section keeps its prefix
+	// cache valid while a changed section forces a miss naturally.
+	// Section caches themselves are content-keyed, so they do not
+	// need an explicit drop here either.
 	if !wasSpinning && a.isSpinning() {
 		return a.StartAnimation()
 	}
 	return nil
 }
 
+// clearCache drops every cached render for this item, including the
+// per-section caches. Shadows the embedded cachedMessageItem.clearCache
+// so ClearItemCaches (style change) wipes the section caches too.
+func (a *AssistantMessageItem) clearCache() {
+	a.cachedMessageItem.clearCache()
+	a.thinkingSec.reset()
+	a.contentSec.reset()
+	a.errorSec.reset()
+}
+
 // ToggleExpanded toggles the expanded state of the thinking box and returns
-// whether the item is now expanded.
+// whether the item is now expanded. Both the thinking section cache and
+// the F3 prefix cache key fold in thinkingExpanded (via the section's
+// extra hash and the prefix cache fingerprint respectively), so no
+// explicit invalidation is required.
 func (a *AssistantMessageItem) ToggleExpanded() bool {
 	a.thinkingExpanded = !a.thinkingExpanded
-	a.clearCache()
 	return a.thinkingExpanded
 }
 
diff --git a/internal/ui/chat/assistant_section_cache_test.go b/internal/ui/chat/assistant_section_cache_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..85a02f909b80d41052a6d805327aef5b6abffb27
--- /dev/null
+++ b/internal/ui/chat/assistant_section_cache_test.go
@@ -0,0 +1,464 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// Fixed Unix timestamps for deterministic cache-equality tests. The
+// thinking section's `extra` hash folds in ThinkingDuration, which
+// in turn depends on (FinishedAt - StartedAt). Anchoring both
+// timestamps removes any wall-clock dependency from the cache key
+// so two builds across a second boundary still hit the cache.
+const (
+	testStartedAt  int64 = 1_700_000_000
+	testFinishedAt int64 = 1_700_000_005
+	testFinishTime int64 = 1_700_000_006
+)
+
+// thinkingMessage builds an assistant message with a fixed reasoning
+// content and an optional text content. When text is empty the
+// message represents a still-thinking turn (matches IsThinking()).
+// Both reasoning timestamps are anchored to fixed Unix seconds so
+// ThinkingDuration is deterministic and cache-equality assertions
+// don't depend on wall-clock time.
+func thinkingMessage(id, thinking, text string) *message.Message {
+	parts := []message.ContentPart{
+		message.ReasoningContent{
+			Thinking:   thinking,
+			StartedAt:  testStartedAt,
+			FinishedAt: testFinishedAt,
+		},
+	}
+	if text != "" {
+		parts = append(parts, message.TextContent{Text: text})
+	}
+	return &message.Message{
+		ID:    id,
+		Role:  message.Assistant,
+		Parts: parts,
+	}
+}
+
+// errorMessage builds a finished assistant message whose finish part
+// carries an error reason plus a custom message and details.
+func errorMessage(id, errMsg, errDetails string) *message.Message {
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "partial output"},
+			message.Finish{
+				Reason:  message.FinishReasonError,
+				Message: errMsg,
+				Details: errDetails,
+				Time:    testFinishTime,
+			},
+		},
+	}
+}
+
+// renderTwoSetMessages drives a SetMessage cycle and returns the
+// section-cache identity (out string pointers via direct comparison
+// of the cached fields). The test compares `out` strings; identical
+// output across cycles is the cache-hit indicator we rely on.
+type sectionSnapshot struct {
+	thinking string
+	content  string
+	errSec   string
+}
+
+func snapshot(a *AssistantMessageItem) sectionSnapshot {
+	return sectionSnapshot{
+		thinking: a.thinkingSec.out,
+		content:  a.contentSec.out,
+		errSec:   a.errorSec.out,
+	}
+}
+
+// TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking covers
+// the central F4 invariant: streaming the main content through SetMessage
+// must keep the cached thinking render intact, provided the inputs to
+// the thinking section render (text, expanded flag, footer state) are
+// unchanged. We seed an already-non-empty content so that IsThinking()
+// is false on both renders — that's the steady streaming state where
+// the thinking block has finished and content keeps growing.
+func TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	thinking := "Step 1\nStep 2\nStep 3"
+	msg := thinkingMessage("a1", thinking, "Initial answer.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 71
+
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	require.NotEmpty(t, first.thinking, "thinking section must be populated after first render")
+
+	// Stream more content into the existing turn. Thinking text and
+	// footer state are byte-identical between the two renders.
+	updated := thinkingMessage("a1", thinking, "Initial answer. More streamed text.")
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking,
+		"content streaming must not invalidate the thinking section render")
+	require.NotEqual(t, first.content, second.content,
+		"content section must have been re-rendered")
+}
+
+// TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent is the
+// mirror of the previous test: extending thinking text must not force a
+// re-render of the content section.
+func TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	content := "Final answer goes here."
+	msg := thinkingMessage("a2", "Step 1", content)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 73
+
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	require.NotEmpty(t, first.content)
+
+	updated := thinkingMessage("a2", "Step 1\nStep 2", content)
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.content, second.content,
+		"thinking streaming must not invalidate the content section render")
+	require.NotEqual(t, first.thinking, second.thinking,
+		"thinking text changed; thinking section must have re-rendered")
+}
+
+// TestAssistantSectionCache_HashKeyDiscrimination asserts that two
+// messages with different source text hash to different per-section
+// keys, and that messages with identical source text hit the cache.
+func TestAssistantSectionCache_HashKeyDiscrimination(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	msgA := thinkingMessage("a3", "thinking A", "content A")
+	msgB := thinkingMessage("a3", "thinking B", "content B")
+
+	itemA := NewAssistantMessageItem(&sty, msgA).(*AssistantMessageItem)
+	itemB := NewAssistantMessageItem(&sty, msgB).(*AssistantMessageItem)
+
+	thinkSrcA, _ := itemA.thinkingKey()
+	thinkSrcB, _ := itemB.thinkingKey()
+	require.NotEqual(t, thinkSrcA, thinkSrcB,
+		"distinct thinking text must produce distinct FNV-64 source hashes")
+
+	contentSrcA, _ := itemA.contentKey()
+	contentSrcB, _ := itemB.contentKey()
+	require.NotEqual(t, contentSrcA, contentSrcB,
+		"distinct content text must produce distinct FNV-64 source hashes")
+
+	// Identical source text on a fresh item must produce the same
+	// hashes — keying invariant for cache hits.
+	itemAClone := NewAssistantMessageItem(&sty, thinkingMessage("a3", "thinking A", "content A")).(*AssistantMessageItem)
+	thinkSrcAClone, _ := itemAClone.thinkingKey()
+	contentSrcAClone, _ := itemAClone.contentKey()
+	require.Equal(t, thinkSrcA, thinkSrcAClone)
+	require.Equal(t, contentSrcA, contentSrcAClone)
+}
+
+// TestAssistantSectionCache_CloneRoundTrip guards the contract that
+// message.Clone() does not invalidate any section cache: re-keying off
+// the cloned message must produce identical hashes and the section
+// caches must serve byte-identical renders.
+func TestAssistantSectionCache_CloneRoundTrip(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	orig := thinkingMessage("a4", "Reasoning step.", "Answer text.")
+	item := NewAssistantMessageItem(&sty, orig).(*AssistantMessageItem)
+
+	const width = 75
+	_ = item.RawRender(width)
+	first := snapshot(item)
+
+	cloned := orig.Clone()
+	item.SetMessage(&cloned)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking, "clone must hit the thinking cache")
+	require.Equal(t, first.content, second.content, "clone must hit the content cache")
+}
+
+// TestAssistantSectionCache_ResizeInvalidatesAll asserts that a width
+// change forces a re-render of every section.
+func TestAssistantSectionCache_ResizeInvalidatesAll(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	msg := errorMessage("a5", "boom", strings.Repeat("detail line\n", 5))
+	// errorMessage returns FinishReasonError; combine with thinking
+	// content so all three sections are exercised.
+	msg.Parts = append([]message.ContentPart{
+		message.ReasoningContent{
+			Thinking:   "Considering options.",
+			StartedAt:  testStartedAt,
+			FinishedAt: testFinishedAt,
+		},
+	}, msg.Parts...)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	_ = item.RawRender(77)
+	first := snapshot(item)
+	require.NotEmpty(t, first.thinking)
+	require.NotEmpty(t, first.content)
+	require.NotEmpty(t, first.errSec)
+
+	_ = item.RawRender(117)
+	second := snapshot(item)
+
+	require.NotEqual(t, first.thinking, second.thinking, "resize must re-render the thinking section")
+	require.NotEqual(t, first.content, second.content, "resize must re-render the content section")
+	require.NotEqual(t, first.errSec, second.errSec, "resize must re-render the error section")
+}
+
+// TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent guards
+// that the error section caches independently. Editing the error
+// message must not invalidate the other two sections, and editing the
+// content must not invalidate the error section.
+func TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	build := func(thinking, content, errMsg, errDetails string) *message.Message {
+		return &message.Message{
+			ID:   "a6",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   thinking,
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: content},
+				message.Finish{
+					Reason:  message.FinishReasonError,
+					Message: errMsg,
+					Details: errDetails,
+					Time:    testFinishTime,
+				},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("think", "content", "boom", "details")).(*AssistantMessageItem)
+	_ = item.RawRender(79)
+	first := snapshot(item)
+
+	// Change only the error text. Thinking and content caches must
+	// survive; error cache must miss and re-render.
+	item.SetMessage(build("think", "content", "different boom", "different details"))
+	_ = item.RawRender(79)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking, "error change must not invalidate thinking")
+	require.Equal(t, first.content, second.content, "error change must not invalidate content")
+	require.NotEqual(t, first.errSec, second.errSec, "error change must re-render the error section")
+
+	// Now change only the content; error cache must survive.
+	item.SetMessage(build("think", "different content", "different boom", "different details"))
+	_ = item.RawRender(79)
+	third := snapshot(item)
+
+	require.Equal(t, second.thinking, third.thinking)
+	require.NotEqual(t, second.content, third.content)
+	require.Equal(t, second.errSec, third.errSec, "content change must not invalidate the error section")
+}
+
+// TestAssistantSectionCache_PrefixCacheRespectsSectionChanges guards
+// the F3/F4 boundary: the prefix cache must invalidate when any
+// underlying section changes. We verify by comparing the F3-cached
+// Render output across SetMessage cycles.
+func TestAssistantSectionCache_PrefixCacheRespectsSectionChanges(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	build := func(content string) *message.Message {
+		return &message.Message{
+			ID:   "a7",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.TextContent{Text: content},
+				message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("first content")).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	const width = 81
+	first := item.Render(width)
+
+	item.SetMessage(build("second content"))
+	second := item.Render(width)
+	require.NotEqual(t, first, second,
+		"prefix cache must invalidate when the content section changes")
+
+	// Re-set to the original content; the prefix cache should
+	// produce identical output again.
+	item.SetMessage(build("first content"))
+	third := item.Render(width)
+	require.Equal(t, first, third)
+}
+
+// TestAssistantSectionCache_ByteIdenticalToFreshRender asserts that the
+// F4 cached path produces the same bytes as a fresh-instance render of
+// the equivalent message — i.e. caching is invisible from the outside.
+// Drives a sequence of mutations (thinking change, content change,
+// finish) and compares every step against an independent item rendered
+// from scratch.
+func TestAssistantSectionCache_ByteIdenticalToFreshRender(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 83
+
+	type step struct {
+		name string
+		msg  *message.Message
+	}
+	startedAt := testStartedAt
+	finishedAt := testFinishedAt
+	finishTime := testFinishTime
+	steps := []step{
+		{
+			name: "thinking-only",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning", StartedAt: startedAt},
+				},
+			},
+		},
+		{
+			name: "thinking-grew",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt},
+				},
+			},
+		},
+		{
+			name: "content-arrived",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt},
+					message.TextContent{Text: "the answer"},
+				},
+			},
+		},
+		{
+			name: "finished-end-turn",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt},
+					message.TextContent{Text: "the answer"},
+					message.Finish{Reason: message.FinishReasonEndTurn, Time: finishTime},
+				},
+			},
+		},
+	}
+
+	first := steps[0].msg.Clone()
+	cached := NewAssistantMessageItem(&sty, &first).(*AssistantMessageItem)
+	for _, s := range steps {
+		cached.SetMessage(s.msg)
+		freshMsg := s.msg.Clone()
+		fresh := NewAssistantMessageItem(&sty, &freshMsg).(*AssistantMessageItem)
+		require.Equal(t, fresh.RawRender(width), cached.RawRender(width),
+			"step %q: cached path must match fresh render byte-for-byte", s.name)
+	}
+}
+
+// TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange
+// guards the F3 prefix cache against composition-only changes:
+// flipping the finish reason from EndTurn to Canceled appends a
+// constant "Canceled" line via renderMessageContent, but no
+// section's own source text changes. The prefix cache must observe
+// the difference (compositionKey is folded into prefixCacheKey) and
+// the resulting bytes must differ. As a second guarantee, a fresh
+// item built with the same final state must produce byte-equal
+// output to the cached item — caching must never produce stale or
+// divergent renders.
+func TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 87
+
+	build := func(reason message.FinishReason) *message.Message {
+		return &message.Message{
+			ID:   "comp",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.TextContent{Text: "hi"},
+				message.Finish{Reason: reason, Time: testFinishTime},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build(message.FinishReasonEndTurn)).(*AssistantMessageItem)
+	endTurnOut := item.Render(width)
+
+	// Flip only the finish reason. Thinking is empty and content
+	// text is unchanged, so no section's source hash moves; only
+	// compositionKey shifts. The prefix cache must miss.
+	item.SetMessage(build(message.FinishReasonCanceled))
+	canceledOut := item.Render(width)
+	require.NotEqual(t, endTurnOut, canceledOut,
+		"prefix cache must invalidate on composition-only change (finish reason)")
+
+	// A fresh item built with the same final state must match the
+	// cached item byte-for-byte — caching is invisible from the
+	// outside and never serves stale output.
+	fresh := NewAssistantMessageItem(&sty, build(message.FinishReasonCanceled)).(*AssistantMessageItem)
+	require.Equal(t, fresh.Render(width), canceledOut,
+		"cached output must equal a fresh render of the same final state")
+}
+
+// TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit guards
+// click-detection geometry across thinking-section cache hits. The
+// thinking box height feeds HandleMouseClick; it is recomputed
+// inside renderThinking and must be restored from
+// assistantSection.aux when the thinking cache hits. We render once
+// to capture the original height, trigger a content-only change so
+// thinkingKey stays identical (thinking text, expanded flag, and
+// footer state all unchanged), render again, and assert the
+// thinkingBoxHeight field is preserved.
+func TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 71
+
+	thinking := strings.Join([]string{
+		"Considering the request.",
+		"Looking at the relevant files.",
+		"Drafting a plan.",
+		"Verifying constraints.",
+	}, "\n")
+	msg := thinkingMessage("hbox", thinking, "initial answer")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.thinkingExpanded = true
+
+	_ = item.RawRender(width)
+	originalHeight := item.thinkingBoxHeight
+	require.Greater(t, originalHeight, 0,
+		"thinking box height must be populated after first render")
+
+	// Stomp the field so a stale read (cache hit that fails to
+	// restore aux) is detectable. Then trigger a content-only
+	// change: thinkingKey is byte-identical between renders, so
+	// the thinking section cache must hit and restore the
+	// preserved height via assistantSection.aux.
+	item.thinkingBoxHeight = -1
+	updated := thinkingMessage("hbox", thinking, "initial answer with more streamed text")
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+
+	require.Equal(t, originalHeight, item.thinkingBoxHeight,
+		"thinkingBoxHeight must be preserved across thinking section cache hits "+
+			"so HandleMouseClick keeps targeting the right rows")
+}

From 43b986c8c00a81b83fcf0422d5334ad61ab046c8 Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 12:22:44 -0400
Subject: [PATCH 04/10] perf(chat): show only the tail of long reasoning blocks
 when expanded

When a reasoning block is expanded, show only the most recent few hundred
lines by default with a hint that more lines are hidden, instead of
rendering the entire thing. Clicking again shows the full block.
This keeps the UI responsive when models produce very long reasoning.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/chat/assistant.go                 | 137 ++++-
 .../ui/chat/assistant_section_cache_test.go   |   2 +-
 internal/ui/chat/assistant_test.go            | 112 +++-
 .../ui/chat/assistant_thinking_window_test.go | 500 ++++++++++++++++++
 internal/ui/model/chat_expand_test.go         |   8 +-
 5 files changed, 720 insertions(+), 39 deletions(-)
 create mode 100644 internal/ui/chat/assistant_thinking_window_test.go

diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go
index ee7ec44fe606e72ea61f5a79d04f1d73264367f5..ac2c0c88819ebe4334b76cdf9b6774ccf75adca7 100644
--- a/internal/ui/chat/assistant.go
+++ b/internal/ui/chat/assistant.go
@@ -16,12 +16,41 @@ import (
 )
 
 // assistantMessageTruncateFormat is the text shown when an assistant message is
-// truncated.
+// truncated in the collapsed state.
 const assistantMessageTruncateFormat = "… (%d lines hidden) [click or space to expand]"
 
+// assistantMessageTailWindowFormat is shown above a tail-windowed thinking
+// block to advertise that earlier lines exist and that the user can
+// promote the view to a full expansion. The promotion is wired through
+// the existing ToggleExpanded path (click / space) — F5 deliberately
+// does not add a new keybinding.
+const assistantMessageTailWindowFormat = "… %d earlier lines hidden [click or space for full view]"
+
 // maxCollapsedThinkingHeight defines the maximum height of the thinking
 const maxCollapsedThinkingHeight = 10
 
+// maxExpandedThinkingTailLines is the F5 tail-window cap. When the user
+// expands a thinking block whose post-glamour line count exceeds this
+// threshold, only the last N lines are shown with an affordance line
+// indicating how many earlier lines are hidden. Clicking / pressing
+// space again promotes the view to a full expansion. The slice is
+// taken AFTER glamour render (not before) so fenced code blocks,
+// lists, and tables are not torn at arbitrary boundaries.
+const maxExpandedThinkingTailLines = 200
+
+// thinkingViewMode is the F5 three-state view machine for the thinking
+// block. ToggleExpanded cycles
+// collapsed → tail-window → full-expanded → collapsed, skipping the
+// tail-window step when the rendered thinking fits within the cap so
+// short blocks still toggle in two clicks.
+type thinkingViewMode uint8
+
+const (
+	thinkingCollapsed thinkingViewMode = iota
+	thinkingTailWindow
+	thinkingFullExpanded
+)
+
 // assistantSection is a per-section render cache for AssistantMessageItem.
 // Each section (thinking, content, error) carries its own keys so that
 // streaming a section does not invalidate a different — often more
@@ -97,7 +126,7 @@ type AssistantMessageItem struct {
 	message           *message.Message
 	sty               *styles.Styles
 	anim              *anim.Anim
-	thinkingExpanded  bool
+	thinkingViewMode  thinkingViewMode
 	thinkingBoxHeight int // Tracks the rendered thinking box height for click detection.
 
 	// Per-section render caches. Splitting these out means content
@@ -304,9 +333,9 @@ func (a *AssistantMessageItem) renderMessageContent(width int) (string, int) {
 
 // thinkingKey returns the (srcHash, extra) cache key components for the
 // thinking section. extra folds in everything other than the raw
-// thinking text that affects the rendered output: the expanded flag
-// and the footer state (which depends on IsThinking, ToolCalls, and
-// ThinkingDuration).
+// thinking text that affects the rendered output: the view mode
+// (collapsed / tail-window / full) and the footer state (which
+// depends on IsThinking, ToolCalls, and ThinkingDuration).
 func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) {
 	thinking := a.message.ReasoningContent().Thinking
 	srcHash := fnv64(thinking)
@@ -319,17 +348,15 @@ func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) {
 			durationStr = duration.String()
 		}
 	}
-	var expanded byte
-	if a.thinkingExpanded {
-		expanded = 1
-	}
 	var footer byte
 	if showFooter {
 		footer = 1
 	}
 	// Length-prefixed framing avoids any delimiter collision between
-	// the flag bytes and the duration string.
-	extra := fnvFields([]byte{expanded, footer}, []byte(durationStr))
+	// the flag bytes and the duration string. The view mode is folded
+	// in so that toggling collapsed ↔ tail-window ↔ full invalidates
+	// only the thinking section, not content/error.
+	extra := fnvFields([]byte{byte(a.thinkingViewMode), footer}, []byte(durationStr))
 	return srcHash, extra
 }
 
@@ -394,6 +421,12 @@ func (a *AssistantMessageItem) cachedError(width int) string {
 }
 
 // renderThinking renders the thinking/reasoning content with footer.
+//
+// Slicing happens AFTER glamour rendering so fenced code blocks, list
+// continuations, and tables are not split mid-block — the same
+// boundary problem §4.4 of the design note flags. The bordered
+// ThinkingBox style is applied on top of the (already-windowed)
+// lines so the visual box matches what the user sees today.
 func (a *AssistantMessageItem) renderThinking(thinking string, width int) string {
 	renderer := common.QuietMarkdownRenderer(a.sty, width)
 	rendered, err := renderer.Render(thinking)
@@ -405,13 +438,23 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string
 	lines := strings.Split(rendered, "\n")
 	totalLines := len(lines)
 
-	isTruncated := totalLines > maxCollapsedThinkingHeight
-	if !a.thinkingExpanded && isTruncated {
-		lines = lines[totalLines-maxCollapsedThinkingHeight:]
-		hint := a.sty.Messages.ThinkingTruncationHint.Render(
-			fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight),
-		)
-		lines = append([]string{hint, ""}, lines...)
+	switch a.thinkingViewMode {
+	case thinkingCollapsed:
+		if totalLines > maxCollapsedThinkingHeight {
+			lines = lines[totalLines-maxCollapsedThinkingHeight:]
+			hint := a.sty.Messages.ThinkingTruncationHint.Render(
+				fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight),
+			)
+			lines = append([]string{hint, ""}, lines...)
+		}
+	case thinkingTailWindow:
+		if totalLines > maxExpandedThinkingTailLines {
+			lines = lines[totalLines-maxExpandedThinkingTailLines:]
+			hint := a.sty.Messages.ThinkingTruncationHint.Render(
+				fmt.Sprintf(assistantMessageTailWindowFormat, totalLines-maxExpandedThinkingTailLines),
+			)
+			lines = append([]string{hint, ""}, lines...)
+		}
 	}
 
 	thinkingStyle := a.sty.Messages.ThinkingBox.Width(width)
@@ -501,14 +544,58 @@ func (a *AssistantMessageItem) clearCache() {
 	a.errorSec.reset()
 }
 
-// ToggleExpanded toggles the expanded state of the thinking box and returns
-// whether the item is now expanded. Both the thinking section cache and
-// the F3 prefix cache key fold in thinkingExpanded (via the section's
-// extra hash and the prefix cache fingerprint respectively), so no
-// explicit invalidation is required.
+// ToggleExpanded advances the F5 thinking view-mode cycle and returns
+// whether the item is now in any expanded state (tail-window or full).
+// The cycle is collapsed → tail-window → full → collapsed, with the
+// tail-window step skipped when the rendered thinking fits within
+// maxExpandedThinkingTailLines so short blocks remain a two-click
+// toggle. Both the thinking section cache and the F3 prefix cache
+// fold thinkingViewMode into their keys, so no explicit invalidation
+// is required here.
+//
+// When the message carries no thinking text the toggle is a no-op:
+// there is nothing to expand, and mutating the view mode would
+// thrash the thinking-section cache key for no visible benefit.
 func (a *AssistantMessageItem) ToggleExpanded() bool {
-	a.thinkingExpanded = !a.thinkingExpanded
-	return a.thinkingExpanded
+	if strings.TrimSpace(a.message.ReasoningContent().Thinking) == "" {
+		return a.thinkingViewMode != thinkingCollapsed
+	}
+	switch a.thinkingViewMode {
+	case thinkingCollapsed:
+		if a.tailWindowWouldTruncate() {
+			a.thinkingViewMode = thinkingTailWindow
+		} else {
+			a.thinkingViewMode = thinkingFullExpanded
+		}
+	case thinkingTailWindow:
+		a.thinkingViewMode = thinkingFullExpanded
+	case thinkingFullExpanded:
+		a.thinkingViewMode = thinkingCollapsed
+	}
+	return a.thinkingViewMode != thinkingCollapsed
+}
+
+// tailWindowWouldTruncate reports whether the current thinking text
+// is long enough that the tail-window step is worth inserting into
+// the toggle cycle. We use a cheap source-text logical-line count
+// as the heuristic rather than peeking into the cache: the cache
+// may be populated in collapsed state (where its height is bounded
+// by maxCollapsedThinkingHeight and tells us nothing about the
+// underlying length), and re-running glamour just to count lines
+// would defeat the cache. The heuristic can over-trigger (a source
+// with many short lines may wrap to fewer than N lines), in which
+// case the tail-window render is visually identical to full and
+// the cycle costs the user one extra toggle — preferred over the
+// alternative of failing to show the affordance on a genuinely
+// long block.
+//
+// Logical line count is `1 + newlineCount` (a string with no
+// newlines is one line). Comparing newline count alone introduced
+// an off-by-one that let a source whose post-newline-split length
+// equalled the cap skip the tail-window step.
+func (a *AssistantMessageItem) tailWindowWouldTruncate() bool {
+	lineCount := 1 + strings.Count(a.message.ReasoningContent().Thinking, "\n")
+	return lineCount > maxExpandedThinkingTailLines
 }
 
 // HandleMouseClick implements MouseClickable. It signals (via a true return)
diff --git a/internal/ui/chat/assistant_section_cache_test.go b/internal/ui/chat/assistant_section_cache_test.go
index 85a02f909b80d41052a6d805327aef5b6abffb27..f07f4146bdd06f249f52759f58e0b02649d20de6 100644
--- a/internal/ui/chat/assistant_section_cache_test.go
+++ b/internal/ui/chat/assistant_section_cache_test.go
@@ -441,7 +441,7 @@ func TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit(t *testing.T) {
 	}, "\n")
 	msg := thinkingMessage("hbox", thinking, "initial answer")
 	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
-	item.thinkingExpanded = true
+	item.thinkingViewMode = thinkingFullExpanded
 
 	_ = item.RawRender(width)
 	originalHeight := item.thinkingBoxHeight
diff --git a/internal/ui/chat/assistant_test.go b/internal/ui/chat/assistant_test.go
index 8ccbb403c30a7aff2448c082d012512ee06cdc18..5784eeff7a1daf5e2876ca7e520074b5642bf7e5 100644
--- a/internal/ui/chat/assistant_test.go
+++ b/internal/ui/chat/assistant_test.go
@@ -10,21 +10,108 @@ import (
 )
 
 // TestAssistantMessageItemExpandable guards the Expandable contract on
-// AssistantMessageItem. The earlier implementation returned no value, which
-// meant the type silently did not satisfy chat.Expandable and the
-// keyboard-driven expand path in model/chat.go skipped thinking blocks.
+// AssistantMessageItem along the keyboard-driven expand path. The earlier
+// implementation returned no value, which meant the type silently did
+// not satisfy chat.Expandable and the keyboard-driven expand path in
+// model/chat.go skipped thinking blocks.
+//
+// We exercise the contract through the bare Expandable interface (the
+// same dispatch site model.Chat.ToggleExpandedSelectedItem uses), which
+// proves both that AssistantMessageItem still satisfies the interface
+// and that the bool return reports the right semantic state at every
+// point in the cycle.
 func TestAssistantMessageItemExpandable(t *testing.T) {
 	t.Parallel()
 
 	sty := styles.CharmtonePantera()
-	msg := &message.Message{ID: "m1", Role: message.Assistant}
-	item := NewAssistantMessageItem(&sty, msg)
+	// Short thinking: under the tail-window cap, so the cycle is
+	// collapsed -> full -> collapsed (tail-window is skipped).
+	msg := thinkingMessage("m1", "step one\nstep two\nstep three", "")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
 
-	exp, ok := item.(Expandable)
+	exp, ok := any(item).(Expandable)
 	require.True(t, ok, "AssistantMessageItem must satisfy Expandable")
 
-	require.True(t, exp.ToggleExpanded(), "first toggle should report expanded")
-	require.False(t, exp.ToggleExpanded(), "second toggle should report collapsed")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode,
+		"new items must start in the collapsed view-mode")
+	require.True(t, exp.ToggleExpanded(),
+		"first toggle of a non-empty thinking block must report expanded")
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode,
+		"short blocks must skip tail-window and land in full expansion")
+	require.False(t, exp.ToggleExpanded(),
+		"second toggle must report collapsed (cycle closed)")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+}
+
+// TestAssistantMessageItemExpandableEmptyThinkingNoOp guards the B2
+// fix: a message with no thinking text must treat ToggleExpanded as a
+// no-op. Mutating the view mode in that case would thrash the
+// thinking-section cache key for no visible benefit and would surprise
+// the caller (model.Chat.ToggleExpandedSelectedItem would treat a
+// "now collapsed" return as a real state change and re-scroll on it).
+func TestAssistantMessageItemExpandableEmptyThinkingNoOp(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := &message.Message{ID: "m1-empty", Role: message.Assistant}
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	exp, ok := any(item).(Expandable)
+	require.True(t, ok, "AssistantMessageItem must satisfy Expandable")
+
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+	require.False(t, exp.ToggleExpanded(),
+		"empty thinking must report current (collapsed) state without flipping")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode,
+		"empty-thinking toggle must not mutate thinkingViewMode")
+
+	// Whitespace-only thinking is still effectively empty.
+	item.message.Parts = []message.ContentPart{
+		message.ReasoningContent{Thinking: "  \n\n\t  ", StartedAt: testStartedAt},
+	}
+	require.False(t, exp.ToggleExpanded())
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+}
+
+// TestAssistantMessageItemTailWindowBoundary guards the B1 fix: the
+// tail-window heuristic must compare logical line counts (1 +
+// newline count) against the cap, not raw newline counts. A source
+// whose logical line count exactly equals the cap must NOT trip the
+// tail-window step (full render still fits cleanly under the cap),
+// while one logical line over the cap must trip it.
+func TestAssistantMessageItemTailWindowBoundary(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+
+	atCap := buildLines(maxExpandedThinkingTailLines)
+	overCap := buildLines(maxExpandedThinkingTailLines + 1)
+
+	atItem := NewAssistantMessageItem(&sty, thinkingMessage("at-cap", atCap, "")).(*AssistantMessageItem)
+	require.False(t, atItem.tailWindowWouldTruncate(),
+		"a source with exactly N logical lines must not trip the tail-window step")
+
+	overItem := NewAssistantMessageItem(&sty, thinkingMessage("over-cap", overCap, "")).(*AssistantMessageItem)
+	require.True(t, overItem.tailWindowWouldTruncate(),
+		"a source with N+1 logical lines must trip the tail-window step")
+}
+
+// buildLines returns a string of n logical lines (n-1 newlines). Each
+// line is a unique short token so callers can distinguish head from
+// tail in rendered output if they need to.
+func buildLines(n int) string {
+	if n <= 0 {
+		return ""
+	}
+	var b []byte
+	for i := 1; i <= n; i++ {
+		if i > 1 {
+			b = append(b, '\n')
+		}
+		b = append(b, 'l', 'n')
+		b = append(b, []byte(itoa(i))...)
+	}
+	return string(b)
 }
 
 // TestAssistantMessageItemHandleMouseClick ensures HandleMouseClick does not
@@ -40,15 +127,16 @@ func TestAssistantMessageItemHandleMouseClick(t *testing.T) {
 	item.thinkingBoxHeight = 5
 
 	// Click inside the thinking box signals handled but must not mutate
-	// the expanded state.
+	// the view-mode state.
 	require.True(t, item.HandleMouseClick(ansi.MouseLeft, 0, 2))
-	require.False(t, item.thinkingExpanded, "HandleMouseClick must not toggle expansion on its own")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode,
+		"HandleMouseClick must not toggle expansion on its own")
 
 	// Click outside the thinking box is ignored entirely.
 	require.False(t, item.HandleMouseClick(ansi.MouseLeft, 0, 10))
-	require.False(t, item.thinkingExpanded)
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
 
 	// Non-left button is ignored.
 	require.False(t, item.HandleMouseClick(ansi.MouseRight, 0, 2))
-	require.False(t, item.thinkingExpanded)
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
 }
diff --git a/internal/ui/chat/assistant_thinking_window_test.go b/internal/ui/chat/assistant_thinking_window_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..3b685847672eb6ff90f985bea5c9900856da6c93
--- /dev/null
+++ b/internal/ui/chat/assistant_thinking_window_test.go
@@ -0,0 +1,500 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"charm.land/lipgloss/v2"
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/charmbracelet/x/ansi"
+	"github.com/stretchr/testify/require"
+)
+
+// thinkingMessageWithLines builds a still-thinking assistant message
+// whose reasoning content is `count` short paragraphs separated by
+// blank lines. The blank-line separation is what matters: glamour
+// renders paragraph blocks one-per-line in the output (with a
+// trailing blank line between paragraphs) instead of reflowing the
+// entire input into one big wrapped paragraph. That gives us a
+// post-glamour line count we can drive past the tail-window
+// threshold deterministically. Each paragraph is tagged with its
+// (1-based) index so the test can identify head vs tail in the
+// rendered output.
+//
+// The message has no text content and no Finish part, so
+// IsThinking() returns true and the render path skips the
+// "Thought for" footer — keeping the rendered height computation
+// simple.
+func thinkingMessageWithLines(id string, count int) *message.Message {
+	var b strings.Builder
+	for i := 1; i <= count; i++ {
+		b.WriteString("ln")
+		b.WriteString(itoa(i))
+		if i < count {
+			// Blank line between paragraphs: glamour preserves the
+			// per-paragraph structure rather than reflowing into one
+			// wrapped block, so totalLines tracks count predictably.
+			b.WriteString("\n\n")
+		}
+	}
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{
+				Thinking:  b.String(),
+				StartedAt: testStartedAt,
+			},
+		},
+	}
+}
+
+// itoa is a local stdlib-free integer formatter used only by these
+// tests; pulling fmt in just for %d would be wasteful when the test
+// fixtures already churn 5000+ short strings.
+func itoa(n int) string {
+	if n == 0 {
+		return "0"
+	}
+	var buf [20]byte
+	i := len(buf)
+	for n > 0 {
+		i--
+		buf[i] = byte('0' + n%10)
+		n /= 10
+	}
+	return string(buf[i:])
+}
+
+// renderedThinkingHeight returns the line count of the cached
+// thinking section render only (not the full RawRender, which also
+// includes the content and error sections). Drives a render at
+// `width` first to populate the cache.
+func renderedThinkingHeight(t *testing.T, item *AssistantMessageItem, width int) int {
+	t.Helper()
+	_ = item.RawRender(width)
+	require.NotEmpty(t, item.thinkingSec.out,
+		"thinking section must be populated after RawRender")
+	return lipgloss.Height(item.thinkingSec.out)
+}
+
+// TestThinkingWindow_CollapsedCapPreserved guards that F5 did not
+// regress the existing collapsed-mode behaviour: a 5000-line
+// thinking block in the default (collapsed) state still renders at
+// most a small bounded height — the last `maxCollapsedThinkingHeight`
+// lines plus the truncation hint. The thinking message keeps
+// IsThinking() == true, so the optional "Thought for" footer is
+// suppressed and the section height equals the box height.
+func TestThinkingWindow_CollapsedCapPreserved(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := thinkingMessageWithLines("collapsed", 5000)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	// Default state must be collapsed.
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+
+	// Unique odd width avoids sharing the glamour renderer cache with
+	// any other parallel test (the renderer instance is memoized per
+	// width and is not safe for concurrent Render calls).
+	const width = 91
+	height := renderedThinkingHeight(t, item, width)
+
+	// Collapsed mode keeps the existing cap: last 10 lines + a
+	// 2-line hint prefix (hint + blank). Allow a small slack for
+	// any future style-driven padding so the test is robust to
+	// cosmetic tweaks while still being orders of magnitude below
+	// the 5000-line source.
+	const collapsedUpperBound = maxCollapsedThinkingHeight + 5
+	require.LessOrEqual(t, height, collapsedUpperBound,
+		"collapsed mode must remain bounded by the small cap; got %d", height)
+}
+
+// TestThinkingWindow_ExpandedShortSkipsTailWindow guards that a
+// short thinking block (well under the tail-window cap) still
+// toggles directly to full expansion without an intermediate
+// tail-window step and shows no affordance footer. The cycle is
+// collapsed -> full -> collapsed for short blocks; tail-window is
+// only inserted when it would actually elide content.
+func TestThinkingWindow_ExpandedShortSkipsTailWindow(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const lines = 50
+	require.Less(t, lines, maxExpandedThinkingTailLines,
+		"this test relies on the source being well under the tail cap")
+	msg := thinkingMessageWithLines("short", lines)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	require.True(t, item.ToggleExpanded(),
+		"first toggle should report expanded")
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode,
+		"short blocks must skip tail-window and go straight to full expansion")
+
+	const width = 93
+	_ = item.RawRender(width)
+	out := item.thinkingSec.out
+	plain := ansi.Strip(out)
+
+	require.NotContains(t, plain, "earlier lines hidden",
+		"short blocks must not show the tail-window affordance")
+	require.NotContains(t, plain, "lines hidden",
+		"short expanded blocks must not show any truncation hint")
+	require.Contains(t, plain, "ln1 ",
+		"a fully expanded short block must include the very first source paragraph")
+	require.Contains(t, plain, "ln50 ",
+		"a fully expanded short block must include the last source paragraph")
+}
+
+// TestThinkingWindow_TailWindowed asserts the central F5 behaviour:
+// expanding a long thinking block produces a tail window of size
+// `maxExpandedThinkingTailLines` plus the affordance footer, with
+// the LAST source line present (i.e. we tailed, not headed) and
+// earlier lines elided.
+//
+// Beyond presence/absence of sentinels, this test verifies a true
+// `tail -K` relationship between the tail-windowed render and the
+// fully-expanded render of the same source at the same width: the
+// last K plain-ANSI lines of the windowed render must byte-equal
+// the last K lines of the unwindowed render.
+//
+// K is sized below the cap to absorb the affordance prefix (hint +
+// blank line) and any small framing differences introduced by the
+// bordered ThinkingBox. The cap minus 5 leaves a comfortable margin
+// for padding/footer rows while still asserting that the bulk of
+// the rendered tail is identical.
+func TestThinkingWindow_TailWindowed(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 5000
+	const width = 95
+
+	// Tail-windowed render.
+	tailMsg := thinkingMessageWithLines("tail", total)
+	tailItem := NewAssistantMessageItem(&sty, tailMsg).(*AssistantMessageItem)
+	require.True(t, tailItem.ToggleExpanded(), "first toggle should report expanded")
+	require.Equal(t, thinkingTailWindow, tailItem.thinkingViewMode,
+		"a long block must enter tail-window after the first toggle")
+
+	height := renderedThinkingHeight(t, tailItem, width)
+
+	// The visible window is N tail lines plus an affordance line
+	// and a blank-line spacer (matching the existing collapsed-mode
+	// hint structure). Allow a small slack for style-driven
+	// padding.
+	const expectedFloor = maxExpandedThinkingTailLines + 1
+	const expectedCeil = maxExpandedThinkingTailLines + 5
+	require.GreaterOrEqual(t, height, expectedFloor,
+		"tail-window must include at least N + affordance lines; got %d", height)
+	require.LessOrEqual(t, height, expectedCeil,
+		"tail-window must not exceed N + a small padding budget; got %d", height)
+
+	tailPlain := ansi.Strip(tailItem.thinkingSec.out)
+
+	require.Contains(t, tailPlain, "earlier lines hidden",
+		"tail-windowed render must include the affordance footer")
+	require.Contains(t, tailPlain, "ln5000",
+		"tail-windowed render must include the LAST source paragraph — we tailed, not headed")
+	require.NotContains(t, tailPlain, "ln1 ",
+		"tail-windowed render must elide early source paragraphs")
+
+	// Independent reference render: same source, same width, full
+	// expansion (no tail slice). The tail-windowed output's last K
+	// lines must byte-equal the unwindowed output's last K lines.
+	fullMsg := thinkingMessageWithLines("tail-full-ref", total)
+	fullItem := NewAssistantMessageItem(&sty, fullMsg).(*AssistantMessageItem)
+	fullItem.thinkingViewMode = thinkingFullExpanded
+	_ = fullItem.RawRender(width)
+	fullPlain := ansi.Strip(fullItem.thinkingSec.out)
+
+	tailLines := strings.Split(tailPlain, "\n")
+	fullLines := strings.Split(fullPlain, "\n")
+
+	// K is the cap minus a small budget that covers the affordance
+	// prefix (hint line + blank line) and any framing differences
+	// the bordered ThinkingBox style may introduce around the
+	// edges. Documented inline because going much larger lets the
+	// affordance row leak into the comparison; going much smaller
+	// dilutes the assertion.
+	const K = maxExpandedThinkingTailLines - 5
+	require.GreaterOrEqual(t, len(tailLines), K,
+		"tail render must contain at least K lines; got %d", len(tailLines))
+	require.GreaterOrEqual(t, len(fullLines), K,
+		"full render must contain at least K lines; got %d", len(fullLines))
+
+	tailTail := tailLines[len(tailLines)-K:]
+	fullTail := fullLines[len(fullLines)-K:]
+	require.Equal(t, fullTail, tailTail,
+		"tail-windowed render's last %d lines must byte-equal the unwindowed render's last %d lines (true tail -K relationship)",
+		K, K)
+}
+
+// TestThinkingWindow_PromoteToFull verifies the cycle continues from
+// tail-window to full expansion: the second toggle drops the
+// affordance, removes the tail slice, and produces a render that
+// matches a fresh item rendered directly in the full-expanded
+// state.
+func TestThinkingWindow_PromoteToFull(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 1500
+	msg := thinkingMessageWithLines("promote", total)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 97
+
+	require.True(t, item.ToggleExpanded())
+	require.Equal(t, thinkingTailWindow, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	tailOut := item.thinkingSec.out
+	require.Contains(t, ansi.Strip(tailOut), "earlier lines hidden")
+
+	require.True(t, item.ToggleExpanded(), "second toggle stays expanded (full)")
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	fullOut := item.thinkingSec.out
+	fullPlain := ansi.Strip(fullOut)
+
+	require.NotContains(t, fullPlain, "earlier lines hidden",
+		"full expansion must drop the tail-window affordance")
+	require.Contains(t, fullPlain, "ln1 ",
+		"full expansion must include the first source paragraph")
+	require.Contains(t, fullPlain, "ln1500 ",
+		"full expansion must include the last source paragraph")
+
+	// Independent reference: a fresh item, rendered straight into
+	// the full-expanded state, must produce byte-equal output.
+	freshMsg := thinkingMessageWithLines("promote-fresh", total)
+	fresh := NewAssistantMessageItem(&sty, freshMsg).(*AssistantMessageItem)
+	fresh.thinkingViewMode = thinkingFullExpanded
+	_ = fresh.RawRender(width)
+	require.Equal(t, fresh.thinkingSec.out, fullOut,
+		"cached full-expanded output must match a fresh full-expanded render")
+
+	// And the cycle closes back to collapsed.
+	require.False(t, item.ToggleExpanded(), "third toggle must report collapsed")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+}
+
+// sectionKey is the tuple that defines a cache-hit identity for an
+// assistantSection: (width, srcHash, extra). Comparing this tuple
+// across mutations is a stronger invariant than byte-equality of
+// rendered output: byte-equality could in principle hold even if
+// the cache invalidated and re-rendered identical bytes, while
+// tuple-equality proves the lookup key never moved.
+type sectionKey struct {
+	width   int
+	srcHash uint64
+	extra   uint64
+}
+
+func keyOf(s assistantSection) sectionKey {
+	return sectionKey{width: s.width, srcHash: s.srcHash, extra: s.extra}
+}
+
+// TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow
+// guards the F4/F5 boundary: streaming the main content while the
+// thinking block sits in tail-window mode must NOT invalidate the
+// thinking section cache. Tail-window state is folded into
+// thinkingKey()'s extra hash, so changing only the content text
+// keeps thinking's (srcHash, extra) tuple identical and the cache
+// hits.
+//
+// The assertion is on the cache key tuple, not just rendered bytes:
+// equal output could in principle survive a re-render with
+// identical inputs, but identical (width, srcHash, extra) tuples
+// across the SetMessage cycle prove the thinking cache was never
+// invalidated to begin with. The mirror tuple on the content
+// section MUST move (the source text changed), or the test isn't
+// exercising what it claims to.
+func TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 1000
+
+	build := func(content string) *message.Message {
+		var b strings.Builder
+		for i := 1; i <= total; i++ {
+			b.WriteString("ln")
+			b.WriteString(itoa(i))
+			if i < total {
+				b.WriteString("\n\n")
+			}
+		}
+		parts := []message.ContentPart{
+			message.ReasoningContent{
+				Thinking:   b.String(),
+				StartedAt:  testStartedAt,
+				FinishedAt: testFinishedAt,
+			},
+		}
+		if content != "" {
+			parts = append(parts, message.TextContent{Text: content})
+		}
+		return &message.Message{ID: "tail-stream", Role: message.Assistant, Parts: parts}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("first answer")).(*AssistantMessageItem)
+	item.thinkingViewMode = thinkingTailWindow
+
+	const width = 99
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	firstThinkingKey := keyOf(item.thinkingSec)
+	firstContentKey := keyOf(item.contentSec)
+	require.NotEmpty(t, first.thinking)
+
+	item.SetMessage(build("first answer with more streaming text"))
+	_ = item.RawRender(width)
+	second := snapshot(item)
+	secondThinkingKey := keyOf(item.thinkingSec)
+	secondContentKey := keyOf(item.contentSec)
+
+	require.Equal(t, firstThinkingKey, secondThinkingKey,
+		"thinking section's (width, srcHash, extra) tuple must not move "+
+			"across a content-only update — proves the cache key never invalidated")
+	require.Equal(t, first.thinking, second.thinking,
+		"content streaming must not invalidate the tail-windowed thinking cache")
+	require.NotEqual(t, firstContentKey, secondContentKey,
+		"content section's tuple MUST move; otherwise this test isn't exercising a real content change")
+	require.NotEqual(t, first.content, second.content,
+		"content section must have re-rendered")
+}
+
+// TestThinkingWindow_ToggleInvalidatesOnlyThinking verifies that
+// cycling thinkingViewMode invalidates the thinking section cache
+// alone — content and error caches survive across the toggle.
+//
+// Like TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow,
+// the assertion is on the cache key tuple (width, srcHash, extra)
+// at each section, not just on rendered bytes:
+//   - thinking's tuple MUST move (extra folds in thinkingViewMode)
+//   - content's and error's tuples MUST NOT move (their keys depend
+//     only on their own source text, untouched by the toggle).
+func TestThinkingWindow_ToggleInvalidatesOnlyThinking(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 1500
+	build := func() *message.Message {
+		var b strings.Builder
+		for i := 1; i <= total; i++ {
+			b.WriteString("ln")
+			b.WriteString(itoa(i))
+			if i < total {
+				b.WriteString("\n\n")
+			}
+		}
+		return &message.Message{
+			ID:   "toggle-iso",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   b.String(),
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: "answer text"},
+				message.Finish{
+					Reason:  message.FinishReasonError,
+					Message: "boom",
+					Details: "details",
+					Time:    testFinishTime,
+				},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build()).(*AssistantMessageItem)
+
+	const width = 101
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	firstThink := keyOf(item.thinkingSec)
+	firstContent := keyOf(item.contentSec)
+	firstErr := keyOf(item.errorSec)
+	require.NotEmpty(t, first.thinking)
+	require.NotEmpty(t, first.content)
+	require.NotEmpty(t, first.errSec)
+
+	// Cycle: collapsed -> tail-window. Only thinking should change.
+	require.True(t, item.ToggleExpanded())
+	require.Equal(t, thinkingTailWindow, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+	secondThink := keyOf(item.thinkingSec)
+	secondContent := keyOf(item.contentSec)
+	secondErr := keyOf(item.errorSec)
+
+	require.NotEqual(t, firstThink, secondThink,
+		"thinking section's tuple MUST move on toggle (extra folds in thinkingViewMode)")
+	require.Equal(t, firstContent, secondContent,
+		"content section's tuple must not move on a thinking toggle")
+	require.Equal(t, firstErr, secondErr,
+		"error section's tuple must not move on a thinking toggle")
+	require.NotEqual(t, first.thinking, second.thinking,
+		"toggling into tail-window must re-render the thinking section")
+	require.Equal(t, first.content, second.content,
+		"toggling thinking view-mode must not invalidate the content section")
+	require.Equal(t, first.errSec, second.errSec,
+		"toggling thinking view-mode must not invalidate the error section")
+
+	// Cycle: tail-window -> full. Same expectation.
+	require.True(t, item.ToggleExpanded())
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	third := snapshot(item)
+	thirdThink := keyOf(item.thinkingSec)
+	thirdContent := keyOf(item.contentSec)
+	thirdErr := keyOf(item.errorSec)
+
+	require.NotEqual(t, secondThink, thirdThink,
+		"thinking section's tuple MUST move on the second toggle as well")
+	require.Equal(t, secondContent, thirdContent,
+		"content section's tuple must remain stable across the second toggle")
+	require.Equal(t, secondErr, thirdErr,
+		"error section's tuple must remain stable across the second toggle")
+	require.NotEqual(t, second.thinking, third.thinking,
+		"toggling into full expansion must re-render the thinking section")
+	require.Equal(t, second.content, third.content)
+	require.Equal(t, second.errSec, third.errSec)
+}
+
+// TestThinkingWindow_BoxHeightTracksWindow asserts that
+// thinkingBoxHeight reflects the WINDOWED render's height in
+// tail-window mode, not the (much larger) full thinking height.
+// This is what HandleMouseClick uses to detect whether a click
+// landed on the thinking box, so getting it wrong would make
+// click detection extend off the bottom of the visible box.
+func TestThinkingWindow_BoxHeightTracksWindow(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 5000
+	msg := thinkingMessageWithLines("box-height", total)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 103
+
+	// Tail-window: height should be roughly the cap.
+	item.thinkingViewMode = thinkingTailWindow
+	_ = item.RawRender(width)
+	tailHeight := item.thinkingBoxHeight
+	require.Greater(t, tailHeight, 0)
+	require.LessOrEqual(t, tailHeight, maxExpandedThinkingTailLines+5,
+		"tail-window box height must reflect the windowed render, not the full thinking height; got %d",
+		tailHeight)
+
+	// Full expansion: height should grow well past the tail cap.
+	item.thinkingViewMode = thinkingFullExpanded
+	_ = item.RawRender(width)
+	fullHeight := item.thinkingBoxHeight
+	require.Greater(t, fullHeight, maxExpandedThinkingTailLines*2,
+		"full expansion box height must reflect the full thinking render; got %d",
+		fullHeight)
+}
diff --git a/internal/ui/model/chat_expand_test.go b/internal/ui/model/chat_expand_test.go
index f40e79b08512bd6f34b9f166ee661263e33e6d0a..a58c2ec64c00c0d525f301732f949de8cc658156 100644
--- a/internal/ui/model/chat_expand_test.go
+++ b/internal/ui/model/chat_expand_test.go
@@ -20,7 +20,13 @@ func TestChatToggleExpandedSelectedItem_AssistantMessage(t *testing.T) {
 
 	u := newTestUI()
 
-	msg := &message.Message{ID: "m-assist", Role: message.Assistant}
+	msg := &message.Message{
+		ID:   "m-assist",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking about it"},
+		},
+	}
 	item := chat.NewAssistantMessageItem(u.com.Styles, msg)
 
 	// The keyboard expand path uses the generic Expandable interface;

From 6b101f38600cd4e4b974ee56af3c9d2382cc2c43 Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 13:21:23 -0400
Subject: [PATCH 05/10] perf(chat): skip re-rendering chat list items that have
 not changed

The chat list now remembers what each item rendered last frame and
reuses it when nothing about that item has changed. Once a message
turn is fully done, its rendered output is reused verbatim on every
subsequent frame for as long as it stays unchanged. Resizing the
window, focusing or selecting an item, dragging a selection over it,
toggling a thinking block, and any other state change still triggers
a fresh render. The visible output is identical to before; the work
to produce it is much less.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/chat/agent.go                     |  20 +-
 .../ui/chat/applyhighlight_callback_test.go   | 207 +++++++++
 internal/ui/chat/assistant.go                 |  32 +-
 internal/ui/chat/messages.go                  |  58 ++-
 internal/ui/chat/tools.go                     |  36 +-
 internal/ui/chat/user.go                      |  14 +-
 internal/ui/chat/version_bump_test.go         | 425 ++++++++++++++++++
 internal/ui/completions/item.go               |  36 +-
 internal/ui/completions/version_bump_test.go  |  85 ++++
 internal/ui/dialog/commands_item.go           |  35 +-
 internal/ui/dialog/models_item.go             |  31 +-
 internal/ui/dialog/reasoning.go               |  22 +-
 internal/ui/dialog/sessions_item.go           |  38 +-
 internal/ui/dialog/version_bump_test.go       | 219 +++++++++
 internal/ui/list/item.go                      |  67 ++-
 internal/ui/list/list.go                      | 222 ++++++++-
 internal/ui/list/list_test.go                 | 401 +++++++++++++++++
 internal/ui/model/layout_test.go              |   2 +
 18 files changed, 1901 insertions(+), 49 deletions(-)
 create mode 100644 internal/ui/chat/applyhighlight_callback_test.go
 create mode 100644 internal/ui/chat/version_bump_test.go
 create mode 100644 internal/ui/completions/version_bump_test.go
 create mode 100644 internal/ui/dialog/version_bump_test.go
 create mode 100644 internal/ui/list/list_test.go

diff --git a/internal/ui/chat/agent.go b/internal/ui/chat/agent.go
index 882971290d7af78e21add7663f993731c618a967..abc65e7b47c684cd1a4d8a12b0a96efaa924329e 100644
--- a/internal/ui/chat/agent.go
+++ b/internal/ui/chat/agent.go
@@ -77,9 +77,23 @@ func (a *AgentToolMessageItem) NestedTools() []ToolMessageItem {
 }
 
 // SetNestedTools sets the nested tools.
+//
+// SetNestedTools always bumps the version. The previous design
+// deduped when the slice's length and element pointers were
+// unchanged, but the live update path in internal/ui/model/ui.go
+// mutates existing children in place (SetToolCall / SetResult on the
+// same pointers) and then calls SetNestedTools with the same slice.
+// Pointer-equality dedupe in that case skips the parent Bump even
+// though the parent's rendered output (which embeds the children
+// inline) has changed, leaving a stale parent entry in the list
+// cache. Always bumping is cheap (one uint64 increment) and called
+// at most once per agent event; in the rare case the slice is
+// truly unchanged the worst case is one extra parent re-render
+// while every child cache hit stays warm.
 func (a *AgentToolMessageItem) SetNestedTools(tools []ToolMessageItem) {
 	a.nestedTools = tools
 	a.clearCache()
+	a.Bump()
 }
 
 // AddNestedTool adds a nested tool.
@@ -90,6 +104,7 @@ func (a *AgentToolMessageItem) AddNestedTool(tool ToolMessageItem) {
 	}
 	a.nestedTools = append(a.nestedTools, tool)
 	a.clearCache()
+	a.Bump()
 }
 
 // AgentToolRenderContext renders agent tool messages.
@@ -201,10 +216,12 @@ func (a *AgenticFetchToolMessageItem) NestedTools() []ToolMessageItem {
 	return a.nestedTools
 }
 
-// SetNestedTools sets the nested tools.
+// SetNestedTools sets the nested tools. Always bumps the version;
+// see [AgentToolMessageItem.SetNestedTools] for the rationale.
 func (a *AgenticFetchToolMessageItem) SetNestedTools(tools []ToolMessageItem) {
 	a.nestedTools = tools
 	a.clearCache()
+	a.Bump()
 }
 
 // AddNestedTool adds a nested tool.
@@ -215,6 +232,7 @@ func (a *AgenticFetchToolMessageItem) AddNestedTool(tool ToolMessageItem) {
 	}
 	a.nestedTools = append(a.nestedTools, tool)
 	a.clearCache()
+	a.Bump()
 }
 
 // AgenticFetchToolRenderContext renders agentic fetch tool messages.
diff --git a/internal/ui/chat/applyhighlight_callback_test.go b/internal/ui/chat/applyhighlight_callback_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..f008c0e2c74a91b277670e4e6a9f9ee6c3a0cff4
--- /dev/null
+++ b/internal/ui/chat/applyhighlight_callback_test.go
@@ -0,0 +1,207 @@
+package chat
+
+import (
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/list"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// renderCountingItem wraps a real chat item and counts Render calls
+// to expose the list-level cache behaviour to tests. The wrapper
+// forwards the list.Item methods exercised by this test — Render,
+// Version, Finished — plus the list.Highlightable surface
+// (SetHighlight / Highlight) used by the callback-driven scenario.
+// Focus is not exercised here, so list.Focusable is not forwarded;
+// add SetFocused/IsFocused if a future test needs to drive focus
+// through the wrapper.
+type renderCountingItem struct {
+	inner       MessageItem
+	renderHits  int
+	highlightCb func(start [4]int)
+}
+
+func newRenderCountingItem(inner MessageItem) *renderCountingItem {
+	return &renderCountingItem{inner: inner}
+}
+
+func (r *renderCountingItem) Render(width int) string {
+	r.renderHits++
+	return r.inner.Render(width)
+}
+
+func (r *renderCountingItem) Version() uint64 {
+	return r.inner.(versionedItem).Version()
+}
+
+func (r *renderCountingItem) Finished() bool {
+	return r.inner.Finished()
+}
+
+// SetHighlight forwards to the embedded item; the underlying
+// highlightableMessageItem dedupes equivalent ranges and bumps the
+// shared version on observable change.
+func (r *renderCountingItem) SetHighlight(startLine, startCol, endLine, endCol int) {
+	if h, ok := r.inner.(list.Highlightable); ok {
+		h.SetHighlight(startLine, startCol, endLine, endCol)
+		if r.highlightCb != nil {
+			r.highlightCb([4]int{startLine, startCol, endLine, endCol})
+		}
+	}
+}
+
+func (r *renderCountingItem) Highlight() (int, int, int, int) {
+	if h, ok := r.inner.(list.Highlightable); ok {
+		return h.Highlight()
+	}
+	return -1, -1, -1, -1
+}
+
+// TestList_CallbackDrivenHighlightUnfreezeAndReFreeze covers F6
+// §4.5.1 along the live applyHighlightRange path. Instead of
+// driving BeginSelectionDrag directly, the test registers a render
+// callback that mutates the chat items' highlight ranges (just like
+// Chat.applyHighlightRange does in production) and verifies the
+// resulting cache behaviour:
+//
+//   - Items inside the active range pick up a SetHighlight call,
+//     their version bumps, the F6 cache invalidates, and the list
+//     re-renders them on the next draw. The post-render entry is
+//     frozen again because the items are Finished() — but their
+//     stored output now reflects the highlight.
+//   - Subsequent draws while the range is unchanged are cache hits:
+//     the callback's SetHighlight call dedupes (same range), the
+//     version is stable, and the list serves the previous output
+//     verbatim without calling Render.
+//   - When the range moves OFF an item, the callback clears the
+//     highlight, the version bumps, and the item re-renders. After
+//     that single re-render the entry re-freezes; further draws are
+//     cache hits.
+func TestList_CallbackDrivenHighlightUnfreezeAndReFreeze(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+
+	// Build three finished assistant messages so all three are
+	// candidates for freezing. Real items (per Round 2 spec) — the
+	// surrounding renderCountingItem wrapper just lets the test see
+	// per-item Render calls.
+	mk := func(id, body string) *renderCountingItem {
+		msg := &message.Message{
+			ID:   id,
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   "thinking",
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: body},
+				message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+			},
+		}
+		inner := NewAssistantMessageItem(&sty, msg)
+		require.True(t, inner.Finished(), "test fixture must be Finished()")
+		return newRenderCountingItem(inner)
+	}
+
+	a := mk("a", "alpha")
+	b := mk("b", "bravo")
+	c := mk("c", "charlie")
+
+	l := list.NewList(a, b, c)
+	l.SetSize(80, 30)
+
+	// activeRange holds the inclusive [start, end] item indexes the
+	// callback should highlight. -1 means no active selection.
+	activeRange := [2]int{-1, -1}
+
+	cb := func(idx, _ int, item list.Item) list.Item {
+		hi, ok := item.(list.Highlightable)
+		if !ok {
+			return item
+		}
+		if activeRange[0] >= 0 && idx >= activeRange[0] && idx <= activeRange[1] {
+			// Inside the range: highlight the entire item.
+			hi.SetHighlight(0, 0, -1, -1)
+		} else {
+			// Outside the range: clear highlight.
+			hi.SetHighlight(-1, -1, -1, -1)
+		}
+		return item
+	}
+	l.RegisterRenderCallback(cb)
+
+	// First render populates the cache. Each item renders exactly
+	// once even though the callback runs for all three.
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "first render: a renders once")
+	require.Equal(t, 1, b.renderHits, "first render: b renders once")
+	require.Equal(t, 1, c.renderHits, "first render: c renders once")
+
+	// Subsequent renders without an active range are cache hits.
+	// The callback's SetHighlight call dedupes (already cleared),
+	// no version bump, frozen entries served verbatim.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 1, a.renderHits, "frozen item must not re-render across stable draws")
+	require.Equal(t, 1, b.renderHits, "frozen item must not re-render across stable draws")
+	require.Equal(t, 1, c.renderHits, "frozen item must not re-render across stable draws")
+
+	// Activate a selection range over items a and b. The callback
+	// will SetHighlight on both during the next render, bumping
+	// their versions. The cache hit fails (version mismatch) and
+	// each in-range item re-renders exactly once.
+	activeRange = [2]int{0, 1}
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "in-range item must re-render after SetHighlight")
+	require.Equal(t, 2, b.renderHits, "in-range item must re-render after SetHighlight")
+	require.Equal(t, 1, c.renderHits, "out-of-range item stays frozen")
+
+	// Verify the highlight actually landed on the in-range items.
+	sLine, _, eLine, _ := a.Highlight()
+	require.Equal(t, 0, sLine)
+	require.Equal(t, -1, eLine)
+	sLine, _, eLine, _ = c.Highlight()
+	require.Equal(t, -1, sLine, "out-of-range item must not be highlighted")
+	require.Equal(t, -1, eLine)
+
+	// While the range stays the same, subsequent renders are cache
+	// hits. The callback dedupes (same range), no version bump,
+	// the post-render entry served verbatim. Note: items are
+	// re-frozen because they're still Finished() and not in the
+	// list's freezeSuppressed set.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 2, a.renderHits, "in-range item re-freezes after the highlight render")
+	require.Equal(t, 2, b.renderHits, "in-range item re-freezes after the highlight render")
+	require.Equal(t, 1, c.renderHits, "out-of-range item stays frozen")
+
+	// Move the range off the items entirely. The callback clears
+	// each in-range item's highlight back to (-1,-1,-1,-1), which
+	// bumps their versions and triggers exactly one re-render
+	// each. After that, the entries re-freeze.
+	activeRange = [2]int{-1, -1}
+	_ = l.Render()
+	require.Equal(t, 3, a.renderHits, "exiting-range item must re-render once when highlight clears")
+	require.Equal(t, 3, b.renderHits, "exiting-range item must re-render once when highlight clears")
+	require.Equal(t, 1, c.renderHits, "never-highlighted item stays frozen")
+
+	// Confirm the highlight has been fully cleared.
+	sLine, _, eLine, _ = a.Highlight()
+	require.Equal(t, -1, sLine)
+	require.Equal(t, -1, eLine)
+
+	// And subsequent renders are cache hits again — the items
+	// re-froze.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 3, a.renderHits, "re-frozen item must not re-render across stable draws")
+	require.Equal(t, 3, b.renderHits, "re-frozen item must not re-render across stable draws")
+	require.Equal(t, 1, c.renderHits, "never-highlighted item stays frozen")
+}
diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go
index ac2c0c88819ebe4334b76cdf9b6774ccf75adca7..e9f00020294abe2d707b645a111edf6181d424f4 100644
--- a/internal/ui/chat/assistant.go
+++ b/internal/ui/chat/assistant.go
@@ -11,6 +11,7 @@ import (
 	"github.com/charmbracelet/crush/internal/message"
 	"github.com/charmbracelet/crush/internal/ui/anim"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/charmbracelet/x/ansi"
 )
@@ -119,6 +120,7 @@ func fnvFields(fields ...[]byte) uint64 {
 //
 // This item includes thinking, and the content but does not include the tool calls.
 type AssistantMessageItem struct {
+	*list.Versioned
 	*highlightableMessageItem
 	*cachedMessageItem
 	*focusableMessageItem
@@ -141,10 +143,12 @@ var _ Expandable = (*AssistantMessageItem)(nil)
 
 // NewAssistantMessageItem creates a new AssistantMessageItem.
 func NewAssistantMessageItem(sty *styles.Styles, message *message.Message) MessageItem {
+	v := list.NewVersioned()
 	a := &AssistantMessageItem{
-		highlightableMessageItem: defaultHighlighter(sty),
+		Versioned:                v,
+		highlightableMessageItem: defaultHighlighter(sty, v),
 		cachedMessageItem:        &cachedMessageItem{},
-		focusableMessageItem:     &focusableMessageItem{},
+		focusableMessageItem:     newFocusableMessageItem(v),
 		message:                  message,
 		sty:                      sty,
 	}
@@ -173,6 +177,13 @@ func (a *AssistantMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 	if !a.isSpinning() {
 		return nil
 	}
+	// Bump the F6 list-cache version so the next draw re-renders
+	// this item: a spinner tick mutates anim's internal frame
+	// counter, which changes the rendered output but is invisible
+	// to the per-section content hashes. Without the bump the
+	// list cache would serve the previously rendered frame
+	// indefinitely and the spinner would appear frozen.
+	a.Bump()
 	return a.anim.Animate(msg)
 }
 
@@ -523,6 +534,12 @@ func (a *AssistantMessageItem) isSpinning() bool {
 func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd {
 	wasSpinning := a.isSpinning()
 	a.message = msg
+	// Bump the F6 version even if the underlying *message.Message
+	// pointer is identical: callers may have mutated the message in
+	// place (delta append) and we cannot tell from here. The
+	// per-section caches dedupe identical content via FNV-64 hashes,
+	// so a redundant bump only costs one list-cache repopulation.
+	a.Bump()
 	// The prefix cache is keyed by a fingerprint that includes every
 	// section's source hash, so an unchanged section keeps its prefix
 	// cache valid while a changed section forces a miss naturally.
@@ -534,6 +551,16 @@ func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd {
 	return nil
 }
 
+// Finished implements list.Item. The assistant message is freezable
+// once the message reports IsFinished() and is no longer spinning
+// (no animation tick remains pending). Streaming tail animation is
+// caught by isSpinning, so freezing only kicks in once the turn is
+// fully terminal. The list cache invalidates the entry on the next
+// version bump if anything (focus, highlight, expansion) changes.
+func (a *AssistantMessageItem) Finished() bool {
+	return a.message.IsFinished() && !a.isSpinning()
+}
+
 // clearCache drops every cached render for this item, including the
 // per-section caches. Shadows the embedded cachedMessageItem.clearCache
 // so ClearItemCaches (style change) wipes the section caches too.
@@ -572,6 +599,7 @@ func (a *AssistantMessageItem) ToggleExpanded() bool {
 	case thinkingFullExpanded:
 		a.thinkingViewMode = thinkingCollapsed
 	}
+	a.Bump()
 	return a.thinkingViewMode != thinkingCollapsed
 }
 
diff --git a/internal/ui/chat/messages.go b/internal/ui/chat/messages.go
index 3c1c5a13b7ba13a5e6990b4cc546c8813c0d5252..1e83ec6f6797a866f3587ad6f349bb4c760b1eb3 100644
--- a/internal/ui/chat/messages.go
+++ b/internal/ui/chat/messages.go
@@ -75,6 +75,12 @@ type SendMsg struct {
 }
 
 type highlightableMessageItem struct {
+	// version is the parent item's version counter. SetHighlight
+	// bumps it on every observable change so the F6 list memo and
+	// any frozen entry get invalidated when a selection drag enters
+	// or leaves the item.
+	version *list.Versioned
+
 	startLine   int
 	startCol    int
 	endLine     int
@@ -103,13 +109,20 @@ func (h *highlightableMessageItem) SetHighlight(startLine int, startCol int, end
 	// Adjust columns for the style's left inset (border + padding) since we
 	// highlight the content only.
 	offset := MessageLeftPaddingTotal
+	newStartCol := max(0, startCol-offset)
+	newEndCol := endCol
+	if endCol >= 0 {
+		newEndCol = max(0, endCol-offset)
+	}
+	if h.startLine == startLine && h.startCol == newStartCol && h.endLine == endLine && h.endCol == newEndCol {
+		return
+	}
 	h.startLine = startLine
-	h.startCol = max(0, startCol-offset)
+	h.startCol = newStartCol
 	h.endLine = endLine
-	if endCol >= 0 {
-		h.endCol = max(0, endCol-offset)
-	} else {
-		h.endCol = endCol
+	h.endCol = newEndCol
+	if h.version != nil {
+		h.version.Bump()
 	}
 }
 
@@ -118,8 +131,9 @@ func (h *highlightableMessageItem) Highlight() (startLine int, startCol int, end
 	return h.startLine, h.startCol, h.endLine, h.endCol
 }
 
-func defaultHighlighter(sty *styles.Styles) *highlightableMessageItem {
+func defaultHighlighter(sty *styles.Styles, v *list.Versioned) *highlightableMessageItem {
 	return &highlightableMessageItem{
+		version:     v,
 		startLine:   -1,
 		startCol:    -1,
 		endLine:     -1,
@@ -135,12 +149,17 @@ type cacheClearable interface {
 }
 
 // ClearItemCaches drops any cached rendered output on each item so the
-// next render uses the current styles.
+// next render uses the current styles. It also bumps each item's
+// version so the F6 list-level memo invalidates frozen entries on
+// the next render.
 func ClearItemCaches(items []MessageItem) {
 	for _, item := range items {
 		if cc, ok := item.(cacheClearable); ok {
 			cc.clearCache()
 		}
+		if v, ok := item.(interface{ Bump() }); ok {
+			v.Bump()
+		}
 	}
 }
 
@@ -216,12 +235,28 @@ func (c *cachedMessageItem) clearCache() {
 
 // focusableMessageItem is a base struct for message items that can be focused.
 type focusableMessageItem struct {
+	// version is the parent item's version counter. SetFocused
+	// bumps it whenever focus actually flips so the F6 list memo
+	// invalidates the per-line focus prefix.
+	version *list.Versioned
 	focused bool
 }
 
+// newFocusableMessageItem returns a focusableMessageItem wired to the
+// shared version counter.
+func newFocusableMessageItem(v *list.Versioned) *focusableMessageItem {
+	return &focusableMessageItem{version: v}
+}
+
 // SetFocused implements MessageItem.
 func (f *focusableMessageItem) SetFocused(focused bool) {
+	if f.focused == focused {
+		return
+	}
 	f.focused = focused
+	if f.version != nil {
+		f.version.Bump()
+	}
 }
 
 // AssistantInfoID returns a stable ID for assistant info items.
@@ -231,6 +266,7 @@ func AssistantInfoID(messageID string) string {
 
 // AssistantInfoItem renders model info and response time after assistant completes.
 type AssistantInfoItem struct {
+	*list.Versioned
 	*cachedMessageItem
 
 	id                  string
@@ -243,6 +279,7 @@ type AssistantInfoItem struct {
 // NewAssistantInfoItem creates a new AssistantInfoItem.
 func NewAssistantInfoItem(sty *styles.Styles, message *message.Message, cfg *config.Config, lastUserMessageTime time.Time) MessageItem {
 	return &AssistantInfoItem{
+		Versioned:           list.NewVersioned(),
 		cachedMessageItem:   &cachedMessageItem{},
 		id:                  AssistantInfoID(message.ID),
 		message:             message,
@@ -252,6 +289,13 @@ func NewAssistantInfoItem(sty *styles.Styles, message *message.Message, cfg *con
 	}
 }
 
+// Finished implements list.Item. Assistant info blocks render a fixed
+// model/duration footer once the assistant turn finishes; the data
+// is immutable after construction so the entry is safe to freeze.
+func (a *AssistantInfoItem) Finished() bool {
+	return true
+}
+
 // ID implements MessageItem.
 func (a *AssistantInfoItem) ID() string {
 	return a.id
diff --git a/internal/ui/chat/tools.go b/internal/ui/chat/tools.go
index 560fd2fc5312852b5cc91a228b4587f4accb0d2a..5855cbe2698e1defe4a5c8800ccbb6c71356c926 100644
--- a/internal/ui/chat/tools.go
+++ b/internal/ui/chat/tools.go
@@ -19,6 +19,7 @@ import (
 	"github.com/charmbracelet/crush/internal/stringext"
 	"github.com/charmbracelet/crush/internal/ui/anim"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/charmbracelet/x/ansi"
 )
@@ -135,6 +136,7 @@ func (f ToolRendererFunc) RenderTool(sty *styles.Styles, width int, opts *ToolRe
 
 // baseToolMessageItem represents a tool call message that can be displayed in the UI.
 type baseToolMessageItem struct {
+	*list.Versioned
 	*highlightableMessageItem
 	*cachedMessageItem
 	*focusableMessageItem
@@ -176,10 +178,12 @@ func newBaseToolMessageItem(
 		status = ToolStatusCanceled
 	}
 
+	v := list.NewVersioned()
 	t := &baseToolMessageItem{
-		highlightableMessageItem: defaultHighlighter(sty),
+		Versioned:                v,
+		highlightableMessageItem: defaultHighlighter(sty, v),
 		cachedMessageItem:        &cachedMessageItem{},
-		focusableMessageItem:     &focusableMessageItem{},
+		focusableMessageItem:     newFocusableMessageItem(v),
 		sty:                      sty,
 		toolRenderer:             toolRenderer,
 		toolCall:                 toolCall,
@@ -270,8 +274,12 @@ func NewToolMessageItem(
 
 // SetCompact implements the Compactable interface.
 func (t *baseToolMessageItem) SetCompact(compact bool) {
+	if t.isCompact == compact {
+		return
+	}
 	t.isCompact = compact
 	t.clearCache()
+	t.Bump()
 }
 
 // ID returns the unique identifier for this tool message item.
@@ -378,12 +386,14 @@ func (t *baseToolMessageItem) ToolCall() message.ToolCall {
 func (t *baseToolMessageItem) SetToolCall(tc message.ToolCall) {
 	t.toolCall = tc
 	t.clearCache()
+	t.Bump()
 }
 
 // SetResult sets the tool result associated with this message item.
 func (t *baseToolMessageItem) SetResult(res *message.ToolResult) {
 	t.result = res
 	t.clearCache()
+	t.Bump()
 }
 
 // MessageID returns the ID of the message containing this tool call.
@@ -392,14 +402,20 @@ func (t *baseToolMessageItem) MessageID() string {
 }
 
 // SetMessageID sets the ID of the message containing this tool call.
+// MessageID is metadata only and does not affect the rendered output,
+// so we deliberately do not bump the version here.
 func (t *baseToolMessageItem) SetMessageID(id string) {
 	t.messageID = id
 }
 
 // SetStatus sets the tool status.
 func (t *baseToolMessageItem) SetStatus(status ToolStatus) {
+	if t.status == status {
+		return
+	}
 	t.status = status
 	t.clearCache()
+	t.Bump()
 }
 
 // Status returns the current tool status.
@@ -439,9 +455,25 @@ func (t *baseToolMessageItem) SetSpinningFunc(fn SpinningFunc) {
 func (t *baseToolMessageItem) ToggleExpanded() bool {
 	t.expandedContent = !t.expandedContent
 	t.clearCache()
+	t.Bump()
 	return t.expandedContent
 }
 
+// Finished implements list.Item. A tool call is freezable once the
+// tool call itself is marked finished AND a result has been recorded
+// (or it has been canceled). Tools that override the spinning logic
+// via spinningFunc would short-circuit live ticks; we still gate
+// freezing on isSpinning to keep the contract conservative.
+func (t *baseToolMessageItem) Finished() bool {
+	if t.isSpinning() {
+		return false
+	}
+	if t.status == ToolStatusCanceled {
+		return true
+	}
+	return t.toolCall.Finished && t.result != nil
+}
+
 // HandleMouseClick implements MouseClickable.
 func (t *baseToolMessageItem) HandleMouseClick(btn ansi.MouseButton, x, y int) bool {
 	return btn == ansi.MouseLeft
diff --git a/internal/ui/chat/user.go b/internal/ui/chat/user.go
index 8194955a6fa99e20a7d949651d6d1400d8f3fb72..0f11c00a68f7aec9013ce2d80b73e0ab4df9c1d9 100644
--- a/internal/ui/chat/user.go
+++ b/internal/ui/chat/user.go
@@ -8,11 +8,13 @@ import (
 	"github.com/charmbracelet/crush/internal/message"
 	"github.com/charmbracelet/crush/internal/ui/attachments"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 )
 
 // UserMessageItem represents a user message in the chat UI.
 type UserMessageItem struct {
+	*list.Versioned
 	*highlightableMessageItem
 	*cachedMessageItem
 	*focusableMessageItem
@@ -24,16 +26,24 @@ type UserMessageItem struct {
 
 // NewUserMessageItem creates a new UserMessageItem.
 func NewUserMessageItem(sty *styles.Styles, message *message.Message, attachments *attachments.Renderer) MessageItem {
+	v := list.NewVersioned()
 	return &UserMessageItem{
-		highlightableMessageItem: defaultHighlighter(sty),
+		Versioned:                v,
+		highlightableMessageItem: defaultHighlighter(sty, v),
 		cachedMessageItem:        &cachedMessageItem{},
-		focusableMessageItem:     &focusableMessageItem{},
+		focusableMessageItem:     newFocusableMessageItem(v),
 		attachments:              attachments,
 		message:                  message,
 		sty:                      sty,
 	}
 }
 
+// Finished implements list.Item. User messages are immutable once
+// submitted, so the entry is always safe to freeze.
+func (m *UserMessageItem) Finished() bool {
+	return true
+}
+
 // RawRender implements [MessageItem].
 func (m *UserMessageItem) RawRender(width int) string {
 	cappedWidth := cappedMessageWidth(width)
diff --git a/internal/ui/chat/version_bump_test.go b/internal/ui/chat/version_bump_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..847f0053bb38324e7019e35294c7e9a81a9ba7a4
--- /dev/null
+++ b/internal/ui/chat/version_bump_test.go
@@ -0,0 +1,425 @@
+package chat
+
+import (
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/config"
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/anim"
+	"github.com/charmbracelet/crush/internal/ui/attachments"
+	"github.com/charmbracelet/crush/internal/ui/list"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// versionedItem is the cross-cutting interface every chat item type
+// must satisfy under F6: every documented mutator must bump the
+// shared version counter so the list-level memo invalidates.
+type versionedItem interface {
+	list.Item
+	Version() uint64
+}
+
+// requireBump asserts that the supplied mutator advances the item's
+// Version(). The mutator runs once; an absent bump is a regression
+// (a finished item would keep serving stale frozen output to the
+// list cache).
+func requireBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Greaterf(t, after, before, "%s must bump Version() (before=%d, after=%d)", name, before, after)
+}
+
+// TestAssistantMessageItem_MutatorsBumpVersion enumerates every
+// documented mutator on AssistantMessageItem and asserts each one
+// advances Version().
+func TestAssistantMessageItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	build := func(thinking, content string) *message.Message {
+		parts := []message.ContentPart{
+			message.ReasoningContent{
+				Thinking:   thinking,
+				StartedAt:  testStartedAt,
+				FinishedAt: testFinishedAt,
+			},
+		}
+		if content != "" {
+			parts = append(parts, message.TextContent{Text: content})
+		}
+		return &message.Message{ID: "a-mut", Role: message.Assistant, Parts: parts}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("thinking", "content")).(*AssistantMessageItem)
+
+	requireBump(t, "SetMessage", item, func() {
+		item.SetMessage(build("thinking", "more content"))
+	})
+	requireBump(t, "SetFocused", item, func() {
+		item.SetFocused(true)
+	})
+	requireBump(t, "SetHighlight", item, func() {
+		item.SetHighlight(0, 0, 0, 5)
+	})
+	// ToggleExpanded only mutates state when there is non-empty
+	// thinking text — which the build helper provides.
+	requireBump(t, "ToggleExpanded", item, func() {
+		item.ToggleExpanded()
+	})
+}
+
+// TestUserMessageItem_MutatorsBumpVersion enumerates UserMessageItem
+// mutators.
+func TestUserMessageItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	r := attachments.NewRenderer(
+		sty.Attachments.Normal,
+		sty.Attachments.Deleting,
+		sty.Attachments.Image,
+		sty.Attachments.Text,
+	)
+	msg := &message.Message{
+		ID:   "u-mut",
+		Role: message.User,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "Hello"},
+		},
+	}
+	item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem)
+
+	requireBump(t, "SetFocused", item, func() {
+		item.SetFocused(true)
+	})
+	requireBump(t, "SetHighlight", item, func() {
+		item.SetHighlight(0, 0, 0, 3)
+	})
+}
+
+// TestAssistantInfoItem_VersionedAndFinished sanity-checks the
+// AssistantInfoItem wiring. The item carries only immutable data
+// after construction; we still assert Version() is callable and
+// Finished() returns true.
+func TestAssistantInfoItem_VersionedAndFinished(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	cfg := &config.Config{}
+	msg := &message.Message{
+		ID:    "info",
+		Role:  message.Assistant,
+		Parts: []message.ContentPart{message.Finish{Reason: message.FinishReasonEndTurn, Time: time.Now().Unix()}},
+	}
+	item := NewAssistantInfoItem(&sty, msg, cfg, time.Unix(0, 0)).(*AssistantInfoItem)
+
+	require.True(t, item.Finished(), "AssistantInfoItem must be Finished()")
+	// Version() is callable and starts at zero.
+	require.Equal(t, uint64(0), item.Version())
+}
+
+// TestBaseToolMessageItem_MutatorsBumpVersion enumerates the base
+// tool item mutators. Specific tool types layer on top of this
+// base; the base bumps cover the shared mutator surface.
+func TestBaseToolMessageItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	tc := message.ToolCall{ID: "tc1", Name: "bash", Input: "{}", Finished: false}
+	item := NewToolMessageItem(&sty, "msg", tc, nil, false)
+
+	v := item.(versionedItem)
+
+	requireBump(t, "SetFocused", v, func() {
+		if f, ok := item.(list.Focusable); ok {
+			f.SetFocused(true)
+		}
+	})
+	requireBump(t, "SetHighlight", v, func() {
+		if h, ok := item.(list.Highlightable); ok {
+			h.SetHighlight(0, 0, 0, 3)
+		}
+	})
+	requireBump(t, "SetToolCall", v, func() {
+		tc2 := tc
+		tc2.Input = `{"command":"echo"}`
+		item.SetToolCall(tc2)
+	})
+	requireBump(t, "SetResult", v, func() {
+		item.SetResult(&message.ToolResult{ToolCallID: "tc1", Content: "ok"})
+	})
+	requireBump(t, "SetStatus", v, func() {
+		item.SetStatus(ToolStatusSuccess)
+	})
+	requireBump(t, "ToggleExpanded", v, func() {
+		if e, ok := item.(Expandable); ok {
+			e.ToggleExpanded()
+		}
+	})
+	requireBump(t, "SetCompact", v, func() {
+		if c, ok := item.(Compactable); ok {
+			c.SetCompact(true)
+		}
+	})
+}
+
+// TestAssistantMessageItem_AnimateBumpsVersion covers the spinner
+// regression: while the assistant message is spinning, every
+// anim.StepMsg fed through Animate must bump Version() so the
+// list-level cache invalidates and the next draw re-renders the
+// advanced spinner frame. Without this bump the cached entry's
+// version stays put and the spinner appears frozen.
+func TestAssistantMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	streaming := &message.Message{
+		ID:   "spin",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking..."},
+		},
+	}
+	item := NewAssistantMessageItem(&sty, streaming).(*AssistantMessageItem)
+
+	requireBump(t, "Animate", item, func() {
+		item.Animate(anim.StepMsg{})
+	})
+
+	// A non-spinning item must not bump on Animate: the bump only
+	// makes sense while the spinner is live, and a stray bump on a
+	// finished item would needlessly invalidate frozen entries.
+	finished := &message.Message{
+		ID:   "spin",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "done"},
+			message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+		},
+	}
+	item.SetMessage(finished)
+	require.True(t, item.Finished(), "item must report Finished() once the message finishes")
+	before := item.Version()
+	item.Animate(anim.StepMsg{})
+	require.Equal(t, before, item.Version(), "Animate must not bump Version() on a non-spinning item")
+}
+
+// TestAssistantMessageItem_FinishedTransition covers §4.5.1: a
+// streaming assistant message reports Finished() == false; once the
+// message reports IsFinished() and stops spinning, Finished() must
+// return true.
+func TestAssistantMessageItem_FinishedTransition(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+
+	// Streaming: no finish part, no content yet — isSpinning == true.
+	streaming := &message.Message{
+		ID:   "stream",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking..."},
+		},
+	}
+	item := NewAssistantMessageItem(&sty, streaming).(*AssistantMessageItem)
+	require.False(t, item.Finished(), "streaming assistant message must not be Finished()")
+
+	// Finished with content.
+	finished := &message.Message{
+		ID:   "stream",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking", StartedAt: testStartedAt, FinishedAt: testFinishedAt},
+			message.TextContent{Text: "the answer"},
+			message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+		},
+	}
+	item.SetMessage(finished)
+	require.True(t, item.Finished(), "finished assistant message must be Finished()")
+}
+
+// TestUserMessageItem_FinishedAlwaysTrue locks in the freezable
+// contract: user messages are never spinning.
+func TestUserMessageItem_FinishedAlwaysTrue(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	r := attachments.NewRenderer(
+		sty.Attachments.Normal,
+		sty.Attachments.Deleting,
+		sty.Attachments.Image,
+		sty.Attachments.Text,
+	)
+	msg := &message.Message{
+		ID:    "u-fin",
+		Role:  message.User,
+		Parts: []message.ContentPart{message.TextContent{Text: "hi"}},
+	}
+	item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem)
+	require.True(t, item.Finished())
+}
+
+// TestAgentToolMessageItem_NestedToolMutatorsBumpVersion covers B1:
+// the nested-tool mutators on AgentToolMessageItem must bump
+// Version() so the list cache invalidates frozen entries when a
+// nested tool is added or the slice changes. SetNestedTools always
+// bumps unconditionally — the live update path in
+// internal/ui/model/ui.go mutates existing children in place and
+// then re-passes the same slice, so a pointer-equality dedupe would
+// hide observable child-render changes. AddNestedTool also always
+// observably mutates state and always bumps.
+func TestAgentToolMessageItem_NestedToolMutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "agent-parent", Name: "agent", Input: `{}`, Finished: false}
+	item := NewAgentToolMessageItem(&sty, parent, nil, false)
+
+	mkChild := func(id string) ToolMessageItem {
+		tc := message.ToolCall{ID: id, Name: "bash", Input: `{}`, Finished: false}
+		return NewToolMessageItem(&sty, "msg", tc, nil, false)
+	}
+
+	// AddNestedTool always bumps.
+	requireBump(t, "AddNestedTool", item, func() {
+		item.AddNestedTool(mkChild("c1"))
+	})
+
+	// SetNestedTools always bumps, even with a pointer-equal slice.
+	current := append([]ToolMessageItem(nil), item.NestedTools()...)
+	requireBump(t, "SetNestedTools[pointer-equal]", item, func() {
+		item.SetNestedTools(current)
+	})
+
+	// SetNestedTools with a different slice (extra element) bumps.
+	requireBump(t, "SetNestedTools[different]", item, func() {
+		item.SetNestedTools(append(current, mkChild("c2")))
+	})
+
+	// SetNestedTools to an empty slice from a non-empty state bumps.
+	requireBump(t, "SetNestedTools[empty]", item, func() {
+		item.SetNestedTools(nil)
+	})
+}
+
+// TestAgenticFetchToolMessageItem_NestedToolMutatorsBumpVersion is
+// the agentic-fetch counterpart to the agent-tool nested mutator
+// bump test above.
+func TestAgenticFetchToolMessageItem_NestedToolMutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "fetch-parent", Name: "agentic_fetch", Input: `{}`, Finished: false}
+	item := NewAgenticFetchToolMessageItem(&sty, parent, nil, false)
+
+	mkChild := func(id string) ToolMessageItem {
+		tc := message.ToolCall{ID: id, Name: "fetch", Input: `{}`, Finished: false}
+		return NewToolMessageItem(&sty, "msg", tc, nil, false)
+	}
+
+	requireBump(t, "AddNestedTool", item, func() {
+		item.AddNestedTool(mkChild("c1"))
+	})
+
+	current := append([]ToolMessageItem(nil), item.NestedTools()...)
+	requireBump(t, "SetNestedTools[pointer-equal]", item, func() {
+		item.SetNestedTools(current)
+	})
+
+	requireBump(t, "SetNestedTools[different]", item, func() {
+		item.SetNestedTools(append(current, mkChild("c2")))
+	})
+
+	requireBump(t, "SetNestedTools[empty]", item, func() {
+		item.SetNestedTools(nil)
+	})
+}
+
+// TestAgentToolMessageItem_NestedChildInPlaceMutationBumpsParent is
+// the T5 regression test: it mirrors the live update flow at
+// internal/ui/model/ui.go:1242-1281 where nested tool calls are
+// updated in place (SetToolCall / SetResult on the same child
+// pointers) and then the same slice is handed back to the parent
+// via SetNestedTools. The parent must still bump its version so
+// the list cache invalidates the parent's pre-rendered string and
+// the freshly-rendered child output becomes visible.
+func TestAgentToolMessageItem_NestedChildInPlaceMutationBumpsParent(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "agent-parent", Name: "agent", Input: `{}`, Finished: false}
+	item := NewAgentToolMessageItem(&sty, parent, nil, false)
+
+	childTC := message.ToolCall{ID: "c1", Name: "bash", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	item.AddNestedTool(child)
+
+	v0 := item.Version()
+	childVersionBefore := child.(versionedItem).Version()
+
+	// In-place mutate the existing child, exactly like the live
+	// flow in ui.go:1271-1278 does.
+	child.SetResult(&message.ToolResult{ToolCallID: "c1", Content: "ok"})
+	require.Greaterf(t, child.(versionedItem).Version(), childVersionBefore,
+		"child SetResult must bump child version")
+
+	// Hand the same slice back to the parent (pointers unchanged).
+	same := item.NestedTools()
+	item.SetNestedTools(same)
+	require.Greaterf(t, item.Version(), v0,
+		"parent SetNestedTools must bump even when child pointers are unchanged (in-place child mutation invalidates parent's pre-rendered output)")
+}
+
+// TestAgenticFetchToolMessageItem_NestedChildInPlaceMutationBumpsParent
+// is the agentic-fetch counterpart of the T5 regression test.
+func TestAgenticFetchToolMessageItem_NestedChildInPlaceMutationBumpsParent(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "fetch-parent", Name: "agentic_fetch", Input: `{}`, Finished: false}
+	item := NewAgenticFetchToolMessageItem(&sty, parent, nil, false)
+
+	childTC := message.ToolCall{ID: "c1", Name: "fetch", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	item.AddNestedTool(child)
+
+	v0 := item.Version()
+	childVersionBefore := child.(versionedItem).Version()
+
+	child.SetResult(&message.ToolResult{ToolCallID: "c1", Content: "ok"})
+	require.Greaterf(t, child.(versionedItem).Version(), childVersionBefore,
+		"child SetResult must bump child version")
+
+	same := item.NestedTools()
+	item.SetNestedTools(same)
+	require.Greaterf(t, item.Version(), v0,
+		"parent SetNestedTools must bump even when child pointers are unchanged")
+}
+
+// TestBaseToolMessageItem_FinishedTransition covers §4.5.1 for
+// tools: a still-running tool reports Finished() == false; once the
+// tool call is marked finished and a result lands, Finished()
+// returns true. Cancelled tools also become Finished.
+func TestBaseToolMessageItem_FinishedTransition(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	tc := message.ToolCall{ID: "tc-fin", Name: "bash", Input: "{}", Finished: false}
+	item := NewToolMessageItem(&sty, "msg", tc, nil, false)
+	require.False(t, item.Finished(), "running tool must not be Finished()")
+
+	tcFinished := tc
+	tcFinished.Finished = true
+	item.SetToolCall(tcFinished)
+	item.SetResult(&message.ToolResult{ToolCallID: "tc-fin", Content: "ok"})
+	require.True(t, item.Finished(), "finished tool with result must be Finished()")
+
+	// Canceled tool with no result is also Finished.
+	tcCanceled := message.ToolCall{ID: "tc-cancel", Name: "bash", Input: "{}", Finished: false}
+	canceled := NewToolMessageItem(&sty, "msg", tcCanceled, nil, true)
+	require.True(t, canceled.Finished(), "canceled tool must be Finished()")
+}
diff --git a/internal/ui/completions/item.go b/internal/ui/completions/item.go
index 3e99408dcc8e04288d5775dc01e17bcdd42a59a4..b149e58a50ab5dbe6025640fc3edd05fbc0aa246 100644
--- a/internal/ui/completions/item.go
+++ b/internal/ui/completions/item.go
@@ -1,6 +1,8 @@
 package completions
 
 import (
+	"slices"
+
 	"charm.land/lipgloss/v2"
 	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/x/ansi"
@@ -23,6 +25,8 @@ type ResourceCompletionValue struct {
 
 // CompletionItem represents an item in the completions list.
 type CompletionItem struct {
+	*list.Versioned
+
 	text    string
 	value   any
 	match   fuzzy.Match
@@ -38,6 +42,7 @@ type CompletionItem struct {
 // NewCompletionItem creates a new completion item.
 func NewCompletionItem(text string, value any, normalStyle, focusedStyle, matchStyle lipgloss.Style) *CompletionItem {
 	return &CompletionItem{
+		Versioned:    list.NewVersioned(),
 		text:         text,
 		value:        value,
 		normalStyle:  normalStyle,
@@ -46,6 +51,15 @@ func NewCompletionItem(text string, value any, normalStyle, focusedStyle, matchS
 	}
 }
 
+// Finished implements list.Item. Completion items render purely from
+// (text, match, focus); any mutation (SetMatch / SetFocused) bumps
+// Version() so the frozen cache entry invalidates on the next
+// render. Marking them finished lets the F6 list memo skip the
+// per-line work for the steady completions popup.
+func (c *CompletionItem) Finished() bool {
+	return true
+}
+
 // Text returns the display text of the item.
 func (c *CompletionItem) Text() string {
 	return c.text
@@ -63,16 +77,34 @@ func (c *CompletionItem) Filter() string {
 
 // SetMatch implements [list.MatchSettable].
 func (c *CompletionItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(c.match, m) {
+		return
+	}
 	c.cache = nil
 	c.match = m
+	c.Bump()
+}
+
+// sameFuzzyMatch reports whether two fuzzy.Match values are
+// observably equal. Because Match contains a slice (MatchedIndexes)
+// it is not directly comparable with ==; we compare the scalar
+// fields and then walk the indexes. SetMatch uses this to skip
+// gratuitous version bumps when the same match is reapplied.
+func sameFuzzyMatch(a, b fuzzy.Match) bool {
+	return a.Str == b.Str &&
+		a.Index == b.Index &&
+		a.Score == b.Score &&
+		slices.Equal(a.MatchedIndexes, b.MatchedIndexes)
 }
 
 // SetFocused implements [list.Focusable].
 func (c *CompletionItem) SetFocused(focused bool) {
-	if c.focused != focused {
-		c.cache = nil
+	if c.focused == focused {
+		return
 	}
+	c.cache = nil
 	c.focused = focused
+	c.Bump()
 }
 
 // Render implements [list.Item].
diff --git a/internal/ui/completions/version_bump_test.go b/internal/ui/completions/version_bump_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..5d3c620c1ed17aa7477754e0cef1d68aa43026ee
--- /dev/null
+++ b/internal/ui/completions/version_bump_test.go
@@ -0,0 +1,85 @@
+package completions
+
+import (
+	"testing"
+
+	"charm.land/lipgloss/v2"
+	"github.com/sahilm/fuzzy"
+	"github.com/stretchr/testify/require"
+)
+
+// TestCompletionItem_MutatorsBumpVersion covers F6 §4.5 for the
+// completions popup: SetMatch and SetFocused must bump Version()
+// when the observable state changes, and dedupe (no bump) when the
+// supplied value is identical to the current state. Without
+// dedupe, the steady completions popup would invalidate the
+// list-level memo on every keystroke that produced the same match.
+func TestCompletionItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	mkItem := func() *CompletionItem {
+		return NewCompletionItem(
+			"internal/ui/chat/user.go",
+			FileCompletionValue{Path: "internal/ui/chat/user.go"},
+			lipgloss.NewStyle(),
+			lipgloss.NewStyle(),
+			lipgloss.NewStyle(),
+		)
+	}
+
+	t.Run("SetFocused", func(t *testing.T) {
+		t.Parallel()
+		item := mkItem()
+
+		// First transition (false -> true) must bump.
+		before := item.Version()
+		item.SetFocused(true)
+		require.Greater(t, item.Version(), before, "SetFocused(true) must bump")
+
+		// Re-applying the same focus state must not bump.
+		stable := item.Version()
+		item.SetFocused(true)
+		require.Equal(t, stable, item.Version(), "SetFocused with same value must not bump")
+
+		// Transition back must bump.
+		item.SetFocused(false)
+		require.Greater(t, item.Version(), stable, "SetFocused(false) must bump")
+	})
+
+	t.Run("SetMatch", func(t *testing.T) {
+		t.Parallel()
+		item := mkItem()
+
+		match := fuzzy.Match{
+			Str:            "user",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 1, 2, 3},
+		}
+		before := item.Version()
+		item.SetMatch(match)
+		require.Greater(t, item.Version(), before, "SetMatch with new value must bump")
+
+		// Re-applying an equivalent match (same fields, equal slice
+		// contents) must not bump.
+		stable := item.Version()
+		same := fuzzy.Match{
+			Str:            "user",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 1, 2, 3},
+		}
+		item.SetMatch(same)
+		require.Equal(t, stable, item.Version(), "SetMatch with equivalent value must not bump")
+
+		// A different match (different MatchedIndexes) must bump.
+		different := fuzzy.Match{
+			Str:            "user",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 2},
+		}
+		item.SetMatch(different)
+		require.Greater(t, item.Version(), stable, "SetMatch with different indexes must bump")
+	})
+}
diff --git a/internal/ui/dialog/commands_item.go b/internal/ui/dialog/commands_item.go
index a71229716d30876bc4e0a0d68dc45a2f27896745..8f656fb388e49eaf9d4b419142c936b4305b1945 100644
--- a/internal/ui/dialog/commands_item.go
+++ b/internal/ui/dialog/commands_item.go
@@ -3,12 +3,14 @@ package dialog
 import (
 	"strings"
 
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/sahilm/fuzzy"
 )
 
 // CommandItem wraps a uicmd.Command to implement the ListItem interface.
 type CommandItem struct {
+	*list.Versioned
 	id       string
 	title    string
 	shortcut string
@@ -20,19 +22,26 @@ type CommandItem struct {
 	focused  bool
 }
 
-var _ ListItem = &CommandItem{}
+var _ ListItem = &CommandItem{Versioned: list.NewVersioned()}
 
 // NewCommandItem creates a new CommandItem.
 func NewCommandItem(t *styles.Styles, id, title, shortcut string, action Action) *CommandItem {
 	return &CommandItem{
-		id:       id,
-		t:        t,
-		title:    title,
-		shortcut: shortcut,
-		action:   action,
+		Versioned: list.NewVersioned(),
+		id:        id,
+		t:         t,
+		title:     title,
+		shortcut:  shortcut,
+		action:    action,
 	}
 }
 
+// Finished implements list.Item. Command items are render-stable
+// outside of explicit SetFocused / SetMatch.
+func (c *CommandItem) Finished() bool {
+	return true
+}
+
 // WithAliases returns the CommandItem with the given aliases for filtering.
 func (c *CommandItem) WithAliases(aliases ...string) *CommandItem {
 	c.aliases = aliases
@@ -54,16 +63,26 @@ func (c *CommandItem) ID() string {
 
 // SetFocused implements ListItem.
 func (c *CommandItem) SetFocused(focused bool) {
-	if c.focused != focused {
-		c.cache = nil
+	if c.focused == focused {
+		return
 	}
+	c.cache = nil
 	c.focused = focused
+	if c.Versioned != nil {
+		c.Bump()
+	}
 }
 
 // SetMatch implements ListItem.
 func (c *CommandItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(c.m, m) {
+		return
+	}
 	c.cache = nil
 	c.m = m
+	if c.Versioned != nil {
+		c.Bump()
+	}
 }
 
 // Action returns the action associated with the command item.
diff --git a/internal/ui/dialog/models_item.go b/internal/ui/dialog/models_item.go
index 8147f8e4b56445531e7f7997dd970ca0172fcc5e..08c62b9d4150967dade66edab155280ca6b8fe68 100644
--- a/internal/ui/dialog/models_item.go
+++ b/internal/ui/dialog/models_item.go
@@ -5,6 +5,7 @@ import (
 	"charm.land/lipgloss/v2"
 	"github.com/charmbracelet/crush/internal/config"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/charmbracelet/x/ansi"
 	"github.com/sahilm/fuzzy"
@@ -12,6 +13,7 @@ import (
 
 // ModelGroup represents a group of model items.
 type ModelGroup struct {
+	*list.Versioned
 	Title      string
 	Items      []*ModelItem
 	configured bool
@@ -21,6 +23,7 @@ type ModelGroup struct {
 // NewModelGroup creates a new ModelGroup.
 func NewModelGroup(t *styles.Styles, title string, configured bool, items ...*ModelItem) ModelGroup {
 	return ModelGroup{
+		Versioned:  list.NewVersioned(),
 		Title:      title,
 		Items:      items,
 		configured: configured,
@@ -28,6 +31,11 @@ func NewModelGroup(t *styles.Styles, title string, configured bool, items ...*Mo
 	}
 }
 
+// Finished implements list.Item. Model groups are immutable headers.
+func (m *ModelGroup) Finished() bool {
+	return true
+}
+
 // AppendItems appends [ModelItem]s to the group.
 func (m *ModelGroup) AppendItems(items ...*ModelItem) {
 	m.Items = append(m.Items, items...)
@@ -50,6 +58,8 @@ func (m *ModelGroup) Render(width int) string {
 
 // ModelItem represents a list item for a model type.
 type ModelItem struct {
+	*list.Versioned
+
 	prov      catwalk.Provider
 	model     catwalk.Model
 	modelType ModelType
@@ -61,6 +71,12 @@ type ModelItem struct {
 	showProvider bool
 }
 
+// Finished implements list.Item. Model items are render-stable
+// outside of explicit SetFocused / SetMatch.
+func (m *ModelItem) Finished() bool {
+	return true
+}
+
 // SelectedModel returns this model item as a [config.SelectedModel] instance.
 func (m *ModelItem) SelectedModel() config.SelectedModel {
 	return config.SelectedModel{
@@ -81,6 +97,7 @@ var _ ListItem = &ModelItem{}
 // NewModelItem creates a new ModelItem.
 func NewModelItem(t *styles.Styles, prov catwalk.Provider, model catwalk.Model, typ ModelType, showProvider bool) *ModelItem {
 	return &ModelItem{
+		Versioned:    list.NewVersioned(),
 		prov:         prov,
 		model:        model,
 		modelType:    typ,
@@ -117,14 +134,24 @@ func (m *ModelItem) Render(width int) string {
 
 // SetFocused implements ListItem.
 func (m *ModelItem) SetFocused(focused bool) {
-	if m.focused != focused {
-		m.cache = nil
+	if m.focused == focused {
+		return
 	}
+	m.cache = nil
 	m.focused = focused
+	if m.Versioned != nil {
+		m.Bump()
+	}
 }
 
 // SetMatch implements ListItem.
 func (m *ModelItem) SetMatch(fm fuzzy.Match) {
+	if sameFuzzyMatch(m.m, fm) {
+		return
+	}
 	m.cache = nil
 	m.m = fm
+	if m.Versioned != nil {
+		m.Bump()
+	}
 }
diff --git a/internal/ui/dialog/reasoning.go b/internal/ui/dialog/reasoning.go
index a0aa664ad150b1b9dc126e784b9114865c9f58b4..74faf4e146aaf160c02b5ca3873923e168d2bed7 100644
--- a/internal/ui/dialog/reasoning.go
+++ b/internal/ui/dialog/reasoning.go
@@ -40,6 +40,7 @@ type Reasoning struct {
 
 // ReasoningItem represents a reasoning effort list item.
 type ReasoningItem struct {
+	*list.Versioned
 	effort    string
 	title     string
 	isCurrent bool
@@ -49,6 +50,12 @@ type ReasoningItem struct {
 	focused   bool
 }
 
+// Finished implements list.Item. Reasoning items are render-stable
+// outside of explicit SetFocused / SetMatch.
+func (r *ReasoningItem) Finished() bool {
+	return true
+}
+
 var (
 	_ Dialog   = (*Reasoning)(nil)
 	_ ListItem = (*ReasoningItem)(nil)
@@ -243,6 +250,7 @@ func (r *Reasoning) setReasoningItems() error {
 	selectedIndex := 0
 	for i, effort := range model.ReasoningLevels {
 		item := &ReasoningItem{
+			Versioned: list.NewVersioned(),
 			effort:    effort,
 			title:     common.FormatReasoningEffort(effort),
 			isCurrent: effort == currentEffort,
@@ -272,16 +280,26 @@ func (r *ReasoningItem) ID() string {
 
 // SetFocused sets the focus state of the reasoning item.
 func (r *ReasoningItem) SetFocused(focused bool) {
-	if r.focused != focused {
-		r.cache = nil
+	if r.focused == focused {
+		return
 	}
+	r.cache = nil
 	r.focused = focused
+	if r.Versioned != nil {
+		r.Bump()
+	}
 }
 
 // SetMatch sets the fuzzy match for the reasoning item.
 func (r *ReasoningItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(r.m, m) {
+		return
+	}
 	r.cache = nil
 	r.m = m
+	if r.Versioned != nil {
+		r.Bump()
+	}
 }
 
 // Render returns the string representation of the reasoning item.
diff --git a/internal/ui/dialog/sessions_item.go b/internal/ui/dialog/sessions_item.go
index 0de64efa667d955eef4869349b6ac060704d235e..b7b8b00d3f3f914db7dc6cb13571ad6bd9899bed 100644
--- a/internal/ui/dialog/sessions_item.go
+++ b/internal/ui/dialog/sessions_item.go
@@ -2,6 +2,7 @@ package dialog
 
 import (
 	"fmt"
+	"slices"
 	"strings"
 	"time"
 
@@ -17,6 +18,19 @@ import (
 	"github.com/sahilm/fuzzy"
 )
 
+// sameFuzzyMatch reports whether two fuzzy.Match values are
+// observably equal. Because Match contains a slice (MatchedIndexes)
+// it is not directly comparable with ==; we compare the scalar
+// fields and then walk the indexes. The dialog list items use this
+// to skip gratuitous version bumps when SetMatch reapplies the same
+// match.
+func sameFuzzyMatch(a, b fuzzy.Match) bool {
+	return a.Str == b.Str &&
+		a.Index == b.Index &&
+		a.Score == b.Score &&
+		slices.Equal(a.MatchedIndexes, b.MatchedIndexes)
+}
+
 // ListItem represents a selectable and searchable item in a dialog list.
 type ListItem interface {
 	list.FilterableItem
@@ -29,6 +43,7 @@ type ListItem interface {
 
 // SessionItem wraps a [session.Session] to implement the [ListItem] interface.
 type SessionItem struct {
+	*list.Versioned
 	session.Session
 	t                *styles.Styles
 	sessionsMode     sessionsMode
@@ -38,6 +53,13 @@ type SessionItem struct {
 	focused          bool
 }
 
+// Finished implements list.Item. Session items are render-stable
+// outside of explicit SetFocused / SetMatch calls, both of which
+// bump Version() and therefore invalidate the F6 frozen entry.
+func (s *SessionItem) Finished() bool {
+	return true
+}
+
 var _ ListItem = &SessionItem{}
 
 // Filter returns the filterable value of the session.
@@ -52,8 +74,14 @@ func (s *SessionItem) ID() string {
 
 // SetMatch sets the fuzzy match for the session item.
 func (s *SessionItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(s.m, m) {
+		return
+	}
 	s.cache = nil
 	s.m = m
+	if s.Versioned != nil {
+		s.Bump()
+	}
 }
 
 // InputValue returns the updated title value
@@ -177,10 +205,14 @@ func renderItem(t ListItemStyles, title string, info string, focused bool, width
 
 // SetFocused sets the focus state of the session item.
 func (s *SessionItem) SetFocused(focused bool) {
-	if s.focused != focused {
-		s.cache = nil
+	if s.focused == focused {
+		return
 	}
+	s.cache = nil
 	s.focused = focused
+	if s.Versioned != nil {
+		s.Bump()
+	}
 }
 
 // sessionItems takes a slice of [session.Session]s and convert them to a slice
@@ -188,7 +220,7 @@ func (s *SessionItem) SetFocused(focused bool) {
 func sessionItems(t *styles.Styles, mode sessionsMode, sessions ...session.Session) []list.FilterableItem {
 	items := make([]list.FilterableItem, len(sessions))
 	for i, s := range sessions {
-		item := &SessionItem{Session: s, t: t, sessionsMode: mode}
+		item := &SessionItem{Versioned: list.NewVersioned(), Session: s, t: t, sessionsMode: mode}
 		if mode == sessionsModeUpdating {
 			item.updateTitleInput = textinput.New()
 			item.updateTitleInput.SetVirtualCursor(false)
diff --git a/internal/ui/dialog/version_bump_test.go b/internal/ui/dialog/version_bump_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..111ff23526e07bf7800488cdaa49bf84dbd758f9
--- /dev/null
+++ b/internal/ui/dialog/version_bump_test.go
@@ -0,0 +1,219 @@
+package dialog
+
+import (
+	"testing"
+
+	"charm.land/catwalk/pkg/catwalk"
+	"github.com/charmbracelet/crush/internal/session"
+	"github.com/charmbracelet/crush/internal/ui/list"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/sahilm/fuzzy"
+	"github.com/stretchr/testify/require"
+)
+
+// versionedItem is the cross-cutting interface every dialog list
+// item must satisfy under F6: every documented mutator must bump
+// the shared version counter so the list-level memo invalidates
+// frozen entries.
+type versionedItem interface {
+	list.Item
+	Version() uint64
+}
+
+// requireBump asserts that running mutate() advances the item's
+// Version().
+func requireBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Greaterf(t, after, before, "%s must bump Version() (before=%d, after=%d)", name, before, after)
+}
+
+// requireNoBump asserts that running mutate() leaves the item's
+// Version() unchanged. Used to lock in the dedupe contract: a
+// mutator called with a value identical to the current state must
+// not gratuitously invalidate the list cache.
+func requireNoBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Equalf(t, before, after, "%s must NOT bump Version() when state is unchanged (before=%d, after=%d)", name, before, after)
+}
+
+// equivMatch returns a fuzzy.Match whose fields and indexes are
+// equivalent to the supplied seed but allocated as a fresh struct
+// so callers exercise the value-equality dedupe path rather than
+// referential equality.
+func equivMatch(seed fuzzy.Match) fuzzy.Match {
+	return fuzzy.Match{
+		Str:            seed.Str,
+		Index:          seed.Index,
+		Score:          seed.Score,
+		MatchedIndexes: append([]int(nil), seed.MatchedIndexes...),
+	}
+}
+
+// TestCommandItem_MutatorsBumpVersion covers F6 §4.5 for the
+// commands palette items: SetFocused and SetMatch bump Version()
+// on observable change and dedupe otherwise.
+func TestCommandItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	item := NewCommandItem(&sty, "id", "Title", "ctrl+t", nil)
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+	requireBump(t, "SetFocused[false]", item, func() {
+		item.SetFocused(false)
+	})
+
+	match := fuzzy.Match{
+		Str:            "Title",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "Title",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 2},
+		})
+	})
+}
+
+// TestModelItem_MutatorsBumpVersion covers F6 §4.5 for the model
+// picker items.
+func TestModelItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	prov := catwalk.Provider{ID: "openai", Name: "OpenAI"}
+	model := catwalk.Model{ID: "gpt-4", Name: "GPT-4"}
+	item := NewModelItem(&sty, prov, model, ModelTypeLarge, true)
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+
+	match := fuzzy.Match{
+		Str:            "GPT-4",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "GPT-4",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{1},
+		})
+	})
+}
+
+// TestSessionItem_MutatorsBumpVersion covers F6 §4.5 for the
+// sessions dialog items.
+func TestSessionItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	item := &SessionItem{
+		Versioned: list.NewVersioned(),
+		Session:   session.Session{ID: "sess-1", Title: "My Session"},
+		t:         &sty,
+	}
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+
+	match := fuzzy.Match{
+		Str:            "My Session",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "My Session",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{3, 4},
+		})
+	})
+}
+
+// TestReasoningItem_MutatorsBumpVersion covers F6 §4.5 for the
+// reasoning effort dialog items.
+func TestReasoningItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	item := &ReasoningItem{
+		Versioned: list.NewVersioned(),
+		effort:    "medium",
+		title:     "Medium",
+		t:         &sty,
+	}
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+
+	match := fuzzy.Match{
+		Str:            "Medium",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "Medium",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{2, 3},
+		})
+	})
+}
diff --git a/internal/ui/list/item.go b/internal/ui/list/item.go
index 7ac87212889dbc58773b409b5a4a96ec47d1fede..68c34a896cffbf8fdd839e4dc2b7da43af6fe679 100644
--- a/internal/ui/list/item.go
+++ b/internal/ui/list/item.go
@@ -7,10 +7,66 @@ import (
 )
 
 // Item represents a single item in the lazy-loaded list.
+//
+// Items participate in the list-level render memo (F6). The cache key
+// for each item is (pointer, width, version). Items must:
+//
+//   - Bump their version (via the embedded *Versioned helper) on every
+//     mutation that changes the rendered output.
+//   - Return Finished() == true once their rendered output will not
+//     change again unless an explicit mutator is invoked. Frozen
+//     entries are emitted verbatim — no Render call — until either
+//     Version() bumps, the viewport width changes, or the list
+//     explicitly invalidates the entry.
 type Item interface {
 	// Render returns the string representation of the item for the given
 	// width.
 	Render(width int) string
+
+	// Version returns a monotonic counter that the list-level cache
+	// uses to detect mutations. Items must increment the version
+	// (via Versioned.Bump) on every state change that would alter
+	// the rendered output.
+	Version() uint64
+
+	// Finished reports whether the item's rendered output has reached
+	// a terminal state and may be frozen by the list cache. Items
+	// that animate, stream, or otherwise still mutate must return
+	// false. A finished item that later mutates must bump its
+	// version on the mutation; the cache treats version bumps as
+	// implicit unfreeze + invalidate.
+	Finished() bool
+}
+
+// Versioned is a tiny embeddable helper that satisfies Item.Version()
+// and provides a Bump() method to call from every state-mutating
+// method. Items typically embed *Versioned alongside their other
+// helpers; see chat.AssistantMessageItem for the canonical wiring.
+//
+// Bump() is not safe for concurrent use; callers must hold whatever
+// synchronization their item type already requires for state
+// mutations. The list itself never reads Version() from a goroutine
+// other than the UI thread.
+type Versioned struct {
+	v uint64
+}
+
+// NewVersioned returns a fresh *Versioned at version zero.
+func NewVersioned() *Versioned {
+	return &Versioned{}
+}
+
+// Version returns the current version counter.
+func (vc *Versioned) Version() uint64 {
+	return vc.v
+}
+
+// Bump advances the version counter by one. Mutators on items that
+// affect the rendered output must call Bump exactly once per
+// observable state change. Bumping more than once per change is
+// harmless other than a single extra cache miss.
+func (vc *Versioned) Bump() {
+	vc.v++
 }
 
 // RawRenderable represents an item that can provide a raw rendering
@@ -45,13 +101,15 @@ type MouseClickable interface {
 
 // SpacerItem is a spacer item that adds vertical space in the list.
 type SpacerItem struct {
+	*Versioned
 	Height int
 }
 
 // NewSpacerItem creates a new [SpacerItem] with the specified height.
 func NewSpacerItem(height int) *SpacerItem {
 	return &SpacerItem{
-		Height: max(0, height-1),
+		Versioned: NewVersioned(),
+		Height:    max(0, height-1),
 	}
 }
 
@@ -59,3 +117,10 @@ func NewSpacerItem(height int) *SpacerItem {
 func (s *SpacerItem) Render(width int) string {
 	return strings.Repeat("\n", s.Height)
 }
+
+// Finished implements Item. SpacerItems are immutable in practice and
+// safe to freeze; any mutation goes through Versioned.Bump which
+// invalidates the frozen entry.
+func (s *SpacerItem) Finished() bool {
+	return true
+}
diff --git a/internal/ui/list/list.go b/internal/ui/list/list.go
index f6c3fdc44501923f37a1943fa69d4432a6d4720c..a4e30bac2dcd02013910551b68a71740c8987d21 100644
--- a/internal/ui/list/list.go
+++ b/internal/ui/list/list.go
@@ -33,9 +33,40 @@ type List struct {
 
 	// renderCallbacks is a list of callbacks to apply when rendering items.
 	renderCallbacks []func(idx, selectedIdx int, item Item) Item
+
+	// cache is the F6 list-level render memo, keyed by item pointer.
+	// Each entry stores the rendered content, a pre-split slice of
+	// lines (so AtBottom / Render / VisibleItemIndices /
+	// findItemAtY all share one render per frame), the height, and
+	// the keys that govern invalidation (width and version). The
+	// frozen flag mirrors §4.5.1: once a Finished() item is
+	// rendered, subsequent draws return the stored output verbatim
+	// without calling back into Render.
+	cache map[Item]*listCacheEntry
+
+	// freezeSuppressed marks items the list must not freeze on the
+	// next render even when their Finished() reports true. This is
+	// the §4.5.1 selection-drag escape hatch (option (a)): items
+	// inside an active selection range render as live items so that
+	// per-line highlight overlays land on the latest content. Cleared
+	// on EndSelectionDrag.
+	freezeSuppressed map[Item]struct{}
+}
+
+// listCacheEntry is the per-item entry in the list-level render memo.
+type listCacheEntry struct {
+	width   int
+	version uint64
+	frozen  bool
+	content string
+	lines   []string
+	height  int
 }
 
-// renderedItem holds the rendered content and height of an item.
+// renderedItem is the legacy view of a cached entry returned by getItem.
+// Internal callers that don't need the line slice keep using this
+// shape; functions that walk lines (Render) take the slice off the
+// cache entry directly.
 type renderedItem struct {
 	content string
 	height  int
@@ -46,6 +77,8 @@ func NewList(items ...Item) *List {
 	l := new(List)
 	l.items = items
 	l.selectedIdx = -1
+	l.cache = make(map[Item]*listCacheEntry)
+	l.freezeSuppressed = make(map[Item]struct{})
 	return l
 }
 
@@ -59,8 +92,13 @@ func (l *List) RegisterRenderCallback(cb RenderCallback) {
 	l.renderCallbacks = append(l.renderCallbacks, cb)
 }
 
-// SetSize sets the size of the list viewport.
+// SetSize sets the size of the list viewport. A width change drops the
+// entire render cache because every entry's wrapped output depends on
+// width; a height-only change is a no-op for the cache.
 func (l *List) SetSize(width, height int) {
+	if l.width != width {
+		l.invalidateAll()
+	}
 	l.width = width
 	l.height = height
 }
@@ -144,12 +182,42 @@ func (l *List) lastOffsetItem() (int, int, int) {
 }
 
 // getItem renders (if needed) and returns the item at the given index.
+// The result is served from the F6 cache when possible — see
+// renderItemEntry for the cache-key semantics.
 func (l *List) getItem(idx int) renderedItem {
 	if idx < 0 || idx >= len(l.items) {
 		return renderedItem{}
 	}
+	entry := l.renderItemEntry(idx)
+	if entry == nil {
+		return renderedItem{}
+	}
+	return renderedItem{content: entry.content, height: entry.height}
+}
+
+// renderItemEntry returns the cache entry for the given index, populating
+// the cache on miss. The result must not be retained past the next
+// invalidation (SetSize width change, SetItems, etc.).
+//
+// Render callbacks always run, even for frozen entries: callbacks
+// are how the list discovers per-frame state changes (selection,
+// highlight range) and they bump the item's version when those
+// changes affect the rendered output. A frozen item whose callback
+// run is a no-op (same focus, same highlight) keeps its stored
+// version and the cache hit is preserved on the post-callback
+// version check.
+func (l *List) renderItemEntry(idx int) *listCacheEntry {
+	if idx < 0 || idx >= len(l.items) {
+		return nil
+	}
 
-	item := l.items[idx]
+	rawItem := l.items[idx]
+	entry := l.cache[rawItem]
+
+	// Run render callbacks. Callbacks may mutate the item (focus,
+	// highlight) which in turn bumps its version when state actually
+	// changes. We capture the post-callback version below.
+	item := rawItem
 	if len(l.renderCallbacks) > 0 {
 		for _, cb := range l.renderCallbacks {
 			if it := cb(idx, l.selectedIdx, item); it != nil {
@@ -158,15 +226,128 @@ func (l *List) getItem(idx int) renderedItem {
 		}
 	}
 
+	version := rawItem.Version()
+	if entry != nil && entry.width == l.width && entry.version == version {
+		// Cache hit — frozen or unfrozen, the entry content is
+		// still correct because no version bump landed since the
+		// last render. Selection-drag suppression turns this into
+		// a miss only if the entry is frozen.
+		if !entry.frozen {
+			return entry
+		}
+		if _, suppressed := l.freezeSuppressed[rawItem]; !suppressed {
+			return entry
+		}
+	}
+
 	rendered := item.Render(l.width)
 	rendered = strings.TrimRight(rendered, "\n")
-	height := strings.Count(rendered, "\n") + 1
-	ri := renderedItem{
-		content: rendered,
-		height:  height,
+	lines := strings.Split(rendered, "\n")
+	height := len(lines)
+
+	// Re-read the version after Render so that any version bumps
+	// caused by Render itself (e.g. an item that mutates internal
+	// state during rendering) are captured. Without this we would
+	// freeze a stale entry under the post-render version.
+	finalVersion := rawItem.Version()
+
+	frozen := false
+	if rawItem.Finished() {
+		if _, suppressed := l.freezeSuppressed[rawItem]; !suppressed {
+			frozen = true
+		}
+	}
+
+	if entry == nil {
+		entry = &listCacheEntry{}
+		l.cache[rawItem] = entry
+	}
+	entry.width = l.width
+	entry.version = finalVersion
+	entry.frozen = frozen
+	entry.content = rendered
+	entry.lines = lines
+	entry.height = height
+	return entry
+}
+
+// invalidateAll drops every cache entry. Called on width changes.
+func (l *List) invalidateAll() {
+	for k := range l.cache {
+		delete(l.cache, k)
 	}
+}
 
-	return ri
+// Invalidate drops the cache entry for the given item, forcing a
+// re-render on the next getItem call. No-op if the item is not in
+// the cache.
+func (l *List) Invalidate(item Item) {
+	delete(l.cache, item)
+}
+
+// InvalidateFrozen drops the frozen flag (and stored content) for the
+// given item. Equivalent to Invalidate but exposed under the F6
+// frozen-items vocabulary so external callers can express intent.
+func (l *List) InvalidateFrozen(item Item) {
+	delete(l.cache, item)
+}
+
+// retainCacheFor drops every cache entry whose key is not in the given
+// item set. Used by SetItems to keep entries for stable items while
+// dropping entries for removed ones.
+func (l *List) retainCacheFor(items []Item) {
+	if len(l.cache) == 0 {
+		return
+	}
+	keep := make(map[Item]struct{}, len(items))
+	for _, it := range items {
+		keep[it] = struct{}{}
+	}
+	for k := range l.cache {
+		if _, ok := keep[k]; !ok {
+			delete(l.cache, k)
+		}
+	}
+}
+
+// BeginSelectionDrag marks the items in the inclusive [startIdx, endIdx]
+// range as un-freezable for the duration of an active selection drag.
+// Frozen entries inside the range are dropped so the next render
+// reflects live selection-overlay output. The corresponding
+// EndSelectionDrag clears the suppression set and lets items
+// re-freeze on their next render. Indices outside the items slice
+// are clipped silently.
+func (l *List) BeginSelectionDrag(startIdx, endIdx int) {
+	if len(l.items) == 0 {
+		return
+	}
+	if startIdx > endIdx {
+		startIdx, endIdx = endIdx, startIdx
+	}
+	startIdx = max(startIdx, 0)
+	endIdx = min(endIdx, len(l.items)-1)
+	for i := startIdx; i <= endIdx; i++ {
+		it := l.items[i]
+		l.freezeSuppressed[it] = struct{}{}
+		// Drop any cached frozen entry so the next render rebuilds
+		// it as a live (un-frozen) entry that picks up the
+		// selection overlay.
+		if entry, ok := l.cache[it]; ok && entry.frozen {
+			delete(l.cache, it)
+		}
+	}
+}
+
+// EndSelectionDrag clears the selection-drag freeze suppression. Items
+// inside the previous range will re-freeze on their next render once
+// their Finished() reports true again.
+func (l *List) EndSelectionDrag() {
+	for k := range l.freezeSuppressed {
+		delete(l.freezeSuppressed, k)
+		// Drop the cache entry so the next render produces a clean
+		// (un-highlighted) frozen entry.
+		delete(l.cache, k)
+	}
 }
 
 // ScrollToIndex scrolls the list to the given item index.
@@ -288,8 +469,11 @@ func (l *List) Render() string {
 	linesNeeded := l.height
 
 	for linesNeeded > 0 && currentIdx < len(l.items) {
-		item := l.getItem(currentIdx)
-		itemLines := strings.Split(item.content, "\n")
+		entry := l.renderItemEntry(currentIdx)
+		if entry == nil {
+			break
+		}
+		itemLines := entry.lines
 		itemHeight := len(itemLines)
 
 		if currentOffset >= 0 && currentOffset < itemHeight {
@@ -348,18 +532,15 @@ func (l *List) PrependItems(items ...Item) {
 	}
 }
 
-// SetItems sets the items in the list.
+// SetItems sets the items in the list. Cache entries for items that
+// remain after the swap are preserved; entries for removed items are
+// dropped.
 func (l *List) SetItems(items ...Item) {
-	l.setItems(true, items...)
-}
-
-// setItems sets the items in the list. If evict is true, it clears the
-// rendered item cache.
-func (l *List) setItems(evict bool, items ...Item) {
 	l.items = items
 	l.selectedIdx = min(l.selectedIdx, len(l.items)-1)
 	l.offsetIdx = min(l.offsetIdx, len(l.items)-1)
 	l.offsetLine = 0
+	l.retainCacheFor(items)
 }
 
 // AppendItems appends items to the list.
@@ -373,9 +554,16 @@ func (l *List) RemoveItem(idx int) {
 		return
 	}
 
+	removed := l.items[idx]
+
 	// Remove the item
 	l.items = append(l.items[:idx], l.items[idx+1:]...)
 
+	// Drop the cache entry for the removed item; entries for stable
+	// items stay valid because they are keyed by pointer, not index.
+	delete(l.cache, removed)
+	delete(l.freezeSuppressed, removed)
+
 	// Adjust selection if needed
 	if l.selectedIdx == idx {
 		l.selectedIdx = -1
diff --git a/internal/ui/list/list_test.go b/internal/ui/list/list_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..5f672b3fcd00cb7175c54e4e8415e1b4224f3a11
--- /dev/null
+++ b/internal/ui/list/list_test.go
@@ -0,0 +1,401 @@
+package list
+
+import (
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+// trackedItem is a test helper that counts Render calls. The body of
+// Render is the item's content concatenated with the call counter so
+// that "served from cache" vs "freshly rendered" is observable from
+// the rendered string itself.
+type trackedItem struct {
+	*Versioned
+	id         string
+	body       string
+	finished   bool
+	renderHits int
+}
+
+func newTrackedItem(id, body string, finished bool) *trackedItem {
+	return &trackedItem{
+		Versioned: NewVersioned(),
+		id:        id,
+		body:      body,
+		finished:  finished,
+	}
+}
+
+func (t *trackedItem) Render(width int) string {
+	t.renderHits++
+	return t.body + ":w=" + strconv.Itoa(width)
+}
+
+func (t *trackedItem) Finished() bool {
+	return t.finished
+}
+
+// TestList_RenderMemo_PointerKey covers the F6 invariant that the
+// list-level cache is keyed by item pointer, not slice index, so
+// PrependItems and AppendItems do not shift cached entries to the
+// wrong item.
+func TestList_RenderMemo_PointerKey(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+
+	// First render populates the cache for every item.
+	first := l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Prepending a new item must not shift the existing entries to
+	// the wrong key. The existing items render exactly once more
+	// only if their cache was lost, which would be a bug. Scroll to
+	// the top so the prepended item is visible and gets rendered.
+	z := newTrackedItem("z", "zulu", false)
+	l.PrependItems(z)
+	l.ScrollToTop()
+	_ = l.Render()
+	require.Equal(t, 1, z.renderHits, "prepended item rendered once")
+	require.Equal(t, 1, a.renderHits, "stable item must keep its cached entry across PrependItems")
+	require.Equal(t, 1, b.renderHits, "stable item must keep its cached entry across PrependItems")
+	require.Equal(t, 1, c.renderHits, "stable item must keep its cached entry across PrependItems")
+
+	// AppendItems is symmetric.
+	d := newTrackedItem("d", "delta", false)
+	l.AppendItems(d)
+	_ = l.Render()
+	require.Equal(t, 1, d.renderHits, "appended item rendered once")
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// The output is non-trivial.
+	require.Contains(t, first, "alpha")
+}
+
+// TestList_SetSize_WidthChangeInvalidates covers the F6 invariant
+// that a width change drops every cached entry but a height-only
+// change leaves the cache intact.
+func TestList_SetSize_WidthChangeInvalidates(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+
+	l := NewList(a, b)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+
+	// Height-only change: no invalidation.
+	l.SetSize(40, 20)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "height-only change must keep cache entries")
+	require.Equal(t, 1, b.renderHits, "height-only change must keep cache entries")
+
+	// Width change: every entry invalidates.
+	l.SetSize(80, 20)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "width change must invalidate cache entries")
+	require.Equal(t, 2, b.renderHits, "width change must invalidate cache entries")
+}
+
+// TestList_RemoveItem_DropsEntry covers the F6 invariant that
+// RemoveItem drops the cache entry for the removed item but leaves
+// the surviving entries in place.
+func TestList_RemoveItem_DropsEntry(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	l.RemoveItem(1) // remove b
+	_ = l.Render()
+	// a and c still cached.
+	require.Equal(t, 1, a.renderHits, "stable item must keep cached entry across RemoveItem")
+	require.Equal(t, 1, c.renderHits, "stable item must keep cached entry across RemoveItem")
+	// The removed item's entry is dropped — verify by re-adding b
+	// and confirming it renders as if fresh.
+	l.AppendItems(b)
+	_ = l.Render()
+	require.Equal(t, 2, b.renderHits, "re-added item must re-render")
+}
+
+// TestList_FrozenItem_NotReRendered covers §4.5.1: items that report
+// Finished() == true on entry creation are marked frozen after the
+// first render and are never re-rendered until width change or
+// version bump.
+func TestList_FrozenItem_NotReRendered(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	b := newTrackedItem("b", "bravo", true)
+
+	l := NewList(a, b)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "frozen items render exactly once on first draw")
+	require.Equal(t, 1, b.renderHits, "frozen items render exactly once on first draw")
+
+	// Many subsequent renders must not re-render frozen items.
+	for range 5 {
+		_ = l.Render()
+	}
+	require.Equal(t, 1, a.renderHits, "frozen items must not re-render across redraws")
+	require.Equal(t, 1, b.renderHits, "frozen items must not re-render across redraws")
+}
+
+// TestList_FrozenItem_TransitionsAfterFinish covers §4.5.1: a
+// streaming item that later reports Finished() == true transitions
+// to frozen on the first render after finish.
+func TestList_FrozenItem_TransitionsAfterFinish(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false) // streaming
+	l := NewList(a)
+	l.SetSize(40, 10)
+
+	// While unfinished, every render rebuilds the cache because the
+	// item's Finished() is false.
+	for range 3 {
+		// Bump the version to simulate a streaming delta.
+		a.Bump()
+		_ = l.Render()
+	}
+	require.Equal(t, 3, a.renderHits)
+
+	// Item finishes; on the next render it freezes.
+	a.finished = true
+	a.Bump()
+	_ = l.Render()
+	require.Equal(t, 4, a.renderHits, "post-finish render still happens once")
+
+	for range 5 {
+		_ = l.Render()
+	}
+	require.Equal(t, 4, a.renderHits, "frozen after finish, no further renders")
+}
+
+// TestList_FrozenItem_VersionBumpUnfreezes covers §4.5.1: a frozen
+// item that gets a version bump (unexpectedly mutated) is unfrozen
+// and re-rendered — no stale output.
+func TestList_FrozenItem_VersionBumpUnfreezes(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	l := NewList(a)
+	l.SetSize(40, 10)
+
+	_ = l.Render()
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+
+	a.Bump()
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "version bump must invalidate frozen entry")
+
+	// Re-renders without bumping go back to cache hits.
+	_ = l.Render()
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "post-bump render re-freezes")
+}
+
+// TestList_FrozenItem_ResizeUnfreezes covers §4.5.1: resize
+// invalidates frozen entries.
+func TestList_FrozenItem_ResizeUnfreezes(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	l := NewList(a)
+	l.SetSize(40, 10)
+
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+
+	l.SetSize(80, 10)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "width change must invalidate frozen entry")
+}
+
+// TestList_FrozenItem_SelectionDragUnfreeze covers §4.5.1: an active
+// selection-drag span must un-freeze items inside the range; ending
+// the drag re-freezes them.
+func TestList_FrozenItem_SelectionDragUnfreeze(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	b := newTrackedItem("b", "bravo", true)
+	c := newTrackedItem("c", "charlie", true)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Begin a selection drag spanning items 0..1. Items inside the
+	// range must re-render (they re-render exactly once because
+	// the un-freeze drops the cached entry, and the selection
+	// suppression keeps them un-frozen until the drag ends).
+	l.BeginSelectionDrag(0, 1)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "drag-spanned item must re-render once on entering the drag")
+	require.Equal(t, 2, b.renderHits, "drag-spanned item must re-render once on entering the drag")
+	require.Equal(t, 1, c.renderHits, "out-of-range item must remain frozen")
+
+	// While the drag is active, items inside the range are NOT
+	// frozen. Subsequent renders without state changes still
+	// trigger re-renders (because version+width hit but frozen=false
+	// also matches; we still re-use the cache — no, actually with
+	// our implementation we DO cache unfrozen entries by version).
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "unfrozen but version-stable hits the cache")
+	require.Equal(t, 2, b.renderHits, "unfrozen but version-stable hits the cache")
+
+	// End the drag. Items inside the range re-render once and
+	// re-freeze.
+	l.EndSelectionDrag()
+	_ = l.Render()
+	require.Equal(t, 3, a.renderHits, "post-drag render re-freezes the entry")
+	require.Equal(t, 3, b.renderHits, "post-drag render re-freezes the entry")
+
+	// Subsequent renders are cache hits again.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 3, a.renderHits, "frozen after drag end")
+	require.Equal(t, 3, b.renderHits, "frozen after drag end")
+}
+
+// TestList_RenderOutputStableAcrossDraws is the F6 byte-equality
+// invariant: rendering the same list multiple times must produce the
+// same bytes.
+func TestList_RenderOutputStableAcrossDraws(t *testing.T) {
+	t.Parallel()
+
+	items := make([]Item, 0, 5)
+	for i := range 5 {
+		items = append(items, newTrackedItem(strconv.Itoa(i), "item-"+strconv.Itoa(i), i%2 == 0))
+	}
+	l := NewList(items...)
+	l.SetSize(40, 20)
+
+	first := l.Render()
+	for range 4 {
+		require.Equal(t, first, l.Render(), "render output must be byte-stable across draws")
+	}
+	// And the output is non-trivial.
+	require.True(t, strings.Contains(first, "item-0"))
+}
+
+// TestList_SetItems_PointerOverlapRetainsCache covers F6 §4.5
+// invalidation semantics for SetItems. When the new slice shares
+// some pointers with the previous slice (a typical "swap a few
+// items, keep the rest" scenario), the cache entries for the
+// surviving items must be retained — re-rendering them would defeat
+// the memo. Entries for the items that were removed must be
+// dropped so they can't serve stale output if the same pointer is
+// re-introduced later.
+func TestList_SetItems_PointerOverlapRetainsCache(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+	d := newTrackedItem("d", "delta", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Replace the slice with one that shares a and c (b is dropped,
+	// d is added). a and c must keep their cache entries; d renders
+	// once on the next draw.
+	l.SetItems(a, c, d)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "stable item must keep cached entry across SetItems")
+	require.Equal(t, 1, c.renderHits, "stable item must keep cached entry across SetItems")
+	require.Equal(t, 1, d.renderHits, "new item renders once")
+
+	// Re-introducing b after it was dropped must rebuild its
+	// entry (its previous cache entry was invalidated by SetItems).
+	l.SetItems(a, b, c)
+	_ = l.Render()
+	require.Equal(t, 2, b.renderHits, "re-introduced item must re-render — its old entry was dropped")
+	// a and c remained throughout both swaps.
+	require.Equal(t, 1, a.renderHits, "stable item retained across multiple SetItems")
+	require.Equal(t, 1, c.renderHits, "stable item retained across multiple SetItems")
+}
+
+// TestList_SetItems_AllNewDropsEveryEntry covers F6 §4.5: when the
+// SetItems slice has no pointer overlap with the previous slice,
+// every cache entry from the previous slice is dropped. This is
+// the pure-replace case (e.g. session switch).
+func TestList_SetItems_AllNewDropsEveryEntry(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Replace with a fully disjoint slice. Every entry from the
+	// previous slice must be dropped.
+	x := newTrackedItem("x", "xray", false)
+	y := newTrackedItem("y", "yankee", false)
+	l.SetItems(x, y)
+	_ = l.Render()
+	require.Equal(t, 1, x.renderHits, "new item renders once")
+	require.Equal(t, 1, y.renderHits, "new item renders once")
+
+	// Re-introducing the originals must rebuild every entry.
+	l.SetItems(a, b, c)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "previously-dropped item must re-render")
+	require.Equal(t, 2, b.renderHits, "previously-dropped item must re-render")
+	require.Equal(t, 2, c.renderHits, "previously-dropped item must re-render")
+}
+
+// TestVersioned_BumpMonotonic covers the basic Versioned contract:
+// Version() starts at zero and Bump() advances it monotonically.
+func TestVersioned_BumpMonotonic(t *testing.T) {
+	t.Parallel()
+
+	v := NewVersioned()
+	require.Equal(t, uint64(0), v.Version())
+	v.Bump()
+	require.Equal(t, uint64(1), v.Version())
+	v.Bump()
+	v.Bump()
+	require.Equal(t, uint64(3), v.Version())
+}
diff --git a/internal/ui/model/layout_test.go b/internal/ui/model/layout_test.go
index f7db5fa07d748fab0f963854d7f54138e9ffa0cc..ea4c33f1c3ce459054b01e66a1e32f4d8cc031de 100644
--- a/internal/ui/model/layout_test.go
+++ b/internal/ui/model/layout_test.go
@@ -20,6 +20,8 @@ type testMessageItem struct {
 func (m testMessageItem) ID() string           { return m.id }
 func (m testMessageItem) Render(int) string    { return m.text }
 func (m testMessageItem) RawRender(int) string { return m.text }
+func (m testMessageItem) Version() uint64      { return 0 }
+func (m testMessageItem) Finished() bool       { return true }
 
 var _ chat.MessageItem = testMessageItem{}
 

From 77b6c38b0fb150d97dbce466f737d1217e5a7604 Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 14:41:31 -0400
Subject: [PATCH 06/10] perf(chat): only render the chat lines that fit on
 screen

Previously the chat list pulled the entire rendered output of every
on-screen item into a buffer and then trimmed it down to the viewport
height at the end. For very tall items, like a long reasoning block,
that meant building a buffer with thousands of lines just to throw
most of them away every frame. The list now stops collecting lines
as soon as the viewport is full, so per-frame work is bounded by the
visible window. Output is unchanged.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/list/list.go      |  52 +++--
 internal/ui/list/list_test.go | 369 ++++++++++++++++++++++++++++++++++
 2 files changed, 404 insertions(+), 17 deletions(-)

diff --git a/internal/ui/list/list.go b/internal/ui/list/list.go
index a4e30bac2dcd02013910551b68a71740c8987d21..d474a3307ba6bff830f4702f960f31e3335a83b0 100644
--- a/internal/ui/list/list.go
+++ b/internal/ui/list/list.go
@@ -457,18 +457,30 @@ func (l *List) VisibleItemIndices() (startIdx, endIdx int) {
 }
 
 // Render renders the list and returns the visible lines.
+//
+// F7: per-item slicing is bounded by the remaining viewport budget so
+// per-frame work is O(viewport) rather than O(total item heights).
+// We never append beyond l.height lines to the output buffer; the
+// final trim is therefore unnecessary. Reverse mode applies the same
+// final reversal as before, which is byte-identical because the
+// pre-F7 trim happened at the tail of the joined buffer (the same
+// lines we now drop implicitly per item).
 func (l *List) Render() string {
 	if len(l.items) == 0 {
 		return ""
 	}
 
-	var lines []string
+	budget := max(l.height, 0)
+	lines := make([]string, 0, budget)
 	currentIdx := l.offsetIdx
 	currentOffset := l.offsetLine
 
-	linesNeeded := l.height
+	for currentIdx < len(l.items) {
+		remaining := budget - len(lines)
+		if remaining <= 0 {
+			break
+		}
 
-	for linesNeeded > 0 && currentIdx < len(l.items) {
 		entry := l.renderItemEntry(currentIdx)
 		if entry == nil {
 			break
@@ -477,37 +489,43 @@ func (l *List) Render() string {
 		itemHeight := len(itemLines)
 
 		if currentOffset >= 0 && currentOffset < itemHeight {
-			// Add visible content lines
-			lines = append(lines, itemLines[currentOffset:]...)
+			// Append only the visible slice that fits in the
+			// remaining viewport budget. Anything past the
+			// budget would be discarded by the pre-F7 tail
+			// trim, so skipping the append here is
+			// byte-identical and bounded.
+			visible := itemLines[currentOffset:]
+			if len(visible) > remaining {
+				visible = visible[:remaining]
+			}
+			lines = append(lines, visible...)
 
-			// Add gap if this is not the absolute last visual element (conceptually gaps are between items)
-			// But in the loop we can just add it and trim later
+			// Gap rows after the item, capped to the
+			// remaining budget so a 30k-line item with a
+			// trailing gap can't push past the viewport.
 			if l.gap > 0 {
-				for i := 0; i < l.gap; i++ {
+				gapBudget := min(budget-len(lines), l.gap)
+				for range gapBudget {
 					lines = append(lines, "")
 				}
 			}
 		} else {
-			// offsetLine starts in the gap
+			// offsetLine starts inside the gap.
 			gapOffset := currentOffset - itemHeight
 			gapRemaining := l.gap - gapOffset
 			if gapRemaining > 0 {
-				for range gapRemaining {
+				gapBudget := min(budget-len(lines), gapRemaining)
+				for range gapBudget {
 					lines = append(lines, "")
 				}
 			}
 		}
 
-		linesNeeded = l.height - len(lines)
 		currentIdx++
-		currentOffset = 0 // Reset offset for subsequent items
+		currentOffset = 0 // Reset offset for subsequent items.
 	}
 
-	l.height = max(l.height, 0)
-
-	if len(lines) > l.height {
-		lines = lines[:l.height]
-	}
+	l.height = budget
 
 	if l.reverse {
 		// Reverse the lines so the list renders bottom-to-top.
diff --git a/internal/ui/list/list_test.go b/internal/ui/list/list_test.go
index 5f672b3fcd00cb7175c54e4e8415e1b4224f3a11..d29f9e75cddf3f880566161d24b1d711c44e487f 100644
--- a/internal/ui/list/list_test.go
+++ b/internal/ui/list/list_test.go
@@ -399,3 +399,372 @@ func TestVersioned_BumpMonotonic(t *testing.T) {
 	v.Bump()
 	require.Equal(t, uint64(3), v.Version())
 }
+
+// multiLineItem is a test helper whose Render returns a fixed
+// multi-line body. Each line is uniquely identifiable (id:N) so a
+// test can reconstruct the expected visible window by index. F7's
+// byte-identity matrix is built around these.
+type multiLineItem struct {
+	*Versioned
+	id     string
+	height int
+}
+
+func newMultiLineItem(id string, height int) *multiLineItem {
+	return &multiLineItem{
+		Versioned: NewVersioned(),
+		id:        id,
+		height:    height,
+	}
+}
+
+func (m *multiLineItem) Render(_ int) string {
+	if m.height <= 0 {
+		return ""
+	}
+	parts := make([]string, m.height)
+	for i := range m.height {
+		parts[i] = m.id + ":" + strconv.Itoa(i)
+	}
+	return strings.Join(parts, "\n")
+}
+
+func (m *multiLineItem) Finished() bool { return true }
+
+// expectedRender computes what list.Render *should* produce from
+// first principles given the item heights, viewport, offsetIdx,
+// offsetLine, gap, and reverse settings. It mirrors the pre-F7
+// "build everything, trim to height, reverse" semantics so we can
+// assert byte-identity against the new bounded path.
+func expectedRender(items []*multiLineItem, height, offsetIdx, offsetLine, gap int, reverse bool) string {
+	if len(items) == 0 {
+		return ""
+	}
+	budget := max(height, 0)
+	var lines []string
+	currentOffset := offsetLine
+	for idx := offsetIdx; idx < len(items) && len(lines) < budget; idx++ {
+		body := items[idx].Render(0)
+		body = strings.TrimRight(body, "\n")
+		itemLines := strings.Split(body, "\n")
+		itemHeight := len(itemLines)
+
+		if currentOffset >= 0 && currentOffset < itemHeight {
+			lines = append(lines, itemLines[currentOffset:]...)
+			for range gap {
+				lines = append(lines, "")
+			}
+		} else {
+			gapOffset := currentOffset - itemHeight
+			gapRemaining := gap - gapOffset
+			for range max(gapRemaining, 0) {
+				lines = append(lines, "")
+			}
+		}
+		currentOffset = 0
+	}
+	if len(lines) > budget {
+		lines = lines[:budget]
+	}
+	if reverse {
+		for i, j := 0, len(lines)-1; i < j; i, j = i+1, j-1 {
+			lines[i], lines[j] = lines[j], lines[i]
+		}
+	}
+	return strings.Join(lines, "\n")
+}
+
+// TestList_F7_ByteIdentityMatrix is T1 from the F7 plan: a sweep
+// over (item heights × viewport heights × offsets × gaps × reverse)
+// that asserts list.Render produces output byte-identical to a
+// pre-F7-equivalent reference (build full buffer, trim at end).
+func TestList_F7_ByteIdentityMatrix(t *testing.T) {
+	t.Parallel()
+
+	itemHeights := [][]int{
+		{1},
+		{5},
+		{1, 1, 1},
+		{3, 7, 2},
+		{20, 5, 30},
+		{50, 1, 50, 1},
+	}
+	viewportHeights := []int{1, 3, 5, 10, 25, 100}
+	offsetIdxs := []int{0, 1, 2}
+	offsetLines := []int{0, 1, 4}
+	gaps := []int{0, 1, 3}
+	reverses := []bool{false, true}
+
+	for _, heights := range itemHeights {
+		for _, vh := range viewportHeights {
+			for _, oIdx := range offsetIdxs {
+				if oIdx >= len(heights) {
+					continue
+				}
+				for _, oLine := range offsetLines {
+					maxOffset := heights[oIdx]
+					if oLine >= maxOffset {
+						continue
+					}
+					for _, gap := range gaps {
+						for _, reverse := range reverses {
+							items := make([]*multiLineItem, len(heights))
+							asItems := make([]Item, len(heights))
+							for i, h := range heights {
+								items[i] = newMultiLineItem("i"+strconv.Itoa(i), h)
+								asItems[i] = items[i]
+							}
+							l := NewList(asItems...)
+							l.SetSize(40, vh)
+							l.SetGap(gap)
+							l.SetReverse(reverse)
+							l.offsetIdx = oIdx
+							l.offsetLine = oLine
+
+							got := l.Render()
+							want := expectedRender(items, vh, oIdx, oLine, gap, reverse)
+							require.Equalf(t, want, got,
+								"mismatch heights=%v vh=%d oIdx=%d oLine=%d gap=%d reverse=%v",
+								heights, vh, oIdx, oLine, gap, reverse)
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+// TestList_F7_GiantItemBoundedRender is T2: a single 10,000-line
+// item with a 50-line viewport. Render must return exactly 50
+// lines — no off-by-one, no trim issue. This is the F7 win in
+// test form: per-frame work is bounded by viewport, not item
+// height.
+func TestList_F7_GiantItemBoundedRender(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 10000
+	const viewport = 50
+
+	giant := newMultiLineItem("giant", itemHeight)
+	l := NewList(giant)
+	l.SetSize(40, viewport)
+
+	out := l.Render()
+	got := strings.Count(out, "\n") + 1
+	require.Equal(t, viewport, got, "render output must be exactly viewport lines for an oversized item")
+
+	// And the lines are the prefix of the item starting at
+	// offset 0.
+	lines := strings.Split(out, "\n")
+	for i, line := range lines {
+		require.Equal(t, "giant:"+strconv.Itoa(i), line, "line %d does not match expected slice", i)
+	}
+}
+
+// TestList_F7_GiantItemWithOffsetBoundedRender complements T2 with
+// a non-zero offsetLine so we exercise both the "skip prefix" and
+// "bound suffix" sides of the slice.
+func TestList_F7_GiantItemWithOffsetBoundedRender(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 10000
+	const viewport = 50
+	const offset = 1234
+
+	giant := newMultiLineItem("giant", itemHeight)
+	l := NewList(giant)
+	l.SetSize(40, viewport)
+	l.offsetLine = offset
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, viewport, "render output must be exactly viewport lines for an oversized item")
+	for i, line := range lines {
+		require.Equal(t, "giant:"+strconv.Itoa(offset+i), line, "line %d does not match expected slice", i)
+	}
+}
+
+// TestList_F7_GapOverflow is T3: viewport height 5, two items each
+// 10 lines, gap of 3. Render returns exactly 5 lines and never
+// includes gap rows beyond the viewport.
+func TestList_F7_GapOverflow(t *testing.T) {
+	t.Parallel()
+
+	a := newMultiLineItem("a", 10)
+	b := newMultiLineItem("b", 10)
+	l := NewList(a, b)
+	l.SetSize(40, 5)
+	l.SetGap(3)
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, 5, "viewport must clamp output to height even with gap rows pending")
+
+	// Gap rows after item a would only appear if the viewport
+	// extended past the first 10 lines, which it doesn't here.
+	for i, line := range lines {
+		require.Equal(t, "a:"+strconv.Itoa(i), line, "line %d", i)
+	}
+}
+
+// TestList_F7_GapOverflow_BoundaryStraddle exercises a viewport
+// that lands inside the gap region between two items: 12 lines
+// viewport, item a height 10, item b height 10, gap 3 — first 10
+// lines from a, then 2 of the 3 gap rows, no b lines yet.
+func TestList_F7_GapOverflow_BoundaryStraddle(t *testing.T) {
+	t.Parallel()
+
+	a := newMultiLineItem("a", 10)
+	b := newMultiLineItem("b", 10)
+	l := NewList(a, b)
+	l.SetSize(40, 12)
+	l.SetGap(3)
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, 12)
+
+	for i := range 10 {
+		require.Equal(t, "a:"+strconv.Itoa(i), lines[i])
+	}
+	require.Equal(t, "", lines[10])
+	require.Equal(t, "", lines[11])
+}
+
+// TestList_F7_ReverseGiantItem is T4: same bounded-slicing
+// invariant in reverse mode. Reverse mode keeps the same final
+// trim semantics; bounded slicing must produce the same window
+// (just reversed).
+func TestList_F7_ReverseGiantItem(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 10000
+	const viewport = 50
+
+	giant := newMultiLineItem("giant", itemHeight)
+	l := NewList(giant)
+	l.SetSize(40, viewport)
+	l.SetReverse(true)
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, viewport)
+
+	// Expected: the same first-50 slice as the non-reverse path
+	// but reversed.
+	for i, line := range lines {
+		expectedIdx := viewport - 1 - i
+		require.Equal(t, "giant:"+strconv.Itoa(expectedIdx), line, "reverse line %d", i)
+	}
+}
+
+// TestList_F7_OffsetLineAtItemBoundary is T5: offsetLine ==
+// itemHeight lands exactly past the last visible line of the item
+// at offsetIdx. The renderer must not address line N (which does
+// not exist); the visible window starts at the gap rows (when gap
+// > 0) or at the next item (when gap == 0).
+func TestList_F7_OffsetLineAtItemBoundary(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name     string
+		gap      int
+		viewport int
+		want     []string
+	}{
+		{
+			name:     "gap zero jumps straight to next item",
+			gap:      0,
+			viewport: 5,
+			want: []string{
+				"b:0", "b:1", "b:2", "b:3", "b:4",
+			},
+		},
+		{
+			name:     "gap two emits gap rows then next item",
+			gap:      2,
+			viewport: 5,
+			want: []string{
+				"", "", "b:0", "b:1", "b:2",
+			},
+		},
+	}
+
+	const itemHeight = 4
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			a := newMultiLineItem("a", itemHeight)
+			b := newMultiLineItem("b", 10)
+			l := NewList(a, b)
+			l.SetSize(40, tc.viewport)
+			l.SetGap(tc.gap)
+			l.offsetIdx = 0
+			l.offsetLine = itemHeight // exactly at the boundary
+
+			out := l.Render()
+			require.Equal(t, strings.Join(tc.want, "\n"), out)
+		})
+	}
+}
+
+// TestList_F7_OffsetLineInsideGap is T6: offsetLine is one row
+// past the end of the item at offsetIdx, landing inside the gap
+// region. The visible window starts at the second gap row and
+// then continues into the next item.
+func TestList_F7_OffsetLineInsideGap(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 4
+	const gap = 3
+	const viewport = 5
+
+	a := newMultiLineItem("a", itemHeight)
+	b := newMultiLineItem("b", 10)
+	l := NewList(a, b)
+	l.SetSize(40, viewport)
+	l.SetGap(gap)
+	l.offsetIdx = 0
+	l.offsetLine = itemHeight + 1 // one row into the gap
+
+	out := l.Render()
+	want := strings.Join([]string{
+		"",    // second gap row
+		"",    // third gap row
+		"b:0", // next item starts
+		"b:1",
+		"b:2",
+	}, "\n")
+	require.Equal(t, want, out)
+}
+
+// TestList_F7_ViewportZeroOrNegative is T7: a non-positive
+// viewport height must produce an empty string with no panic and
+// must normalize l.height to zero (the budget := max(l.height, 0)
+// side effect).
+func TestList_F7_ViewportZeroOrNegative(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name   string
+		height int
+	}{
+		{name: "height zero", height: 0},
+		{name: "height negative", height: -1},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			a := newMultiLineItem("a", 5)
+			l := NewList(a)
+			l.SetSize(40, tc.height)
+
+			out := l.Render()
+			require.Equal(t, "", out, "render must be empty for non-positive viewport")
+			require.Equal(t, 0, l.height, "render must normalize height to zero")
+		})
+	}
+}

From 1c7ccfd7f78853dd86598eeef103f4ac0c29e8b2 Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 15:47:39 -0400
Subject: [PATCH 07/10] perf(chat): reuse the rendered prefix of a streaming
 reply

While a model is streaming a reply, we used to re-render the entire
message text every time a new chunk arrived. Now we keep the rendered
output of the parts that are clearly finished and only render the
trailing piece on each update. The check for what counts as a finished
part is conservative on purpose, so anything risky (open code blocks,
lists, tables, html blocks, link references) still falls back to a
full render. The visible output is the same.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/chat/assistant.go                |  30 +-
 internal/ui/chat/incremental_glamour_test.go | 853 +++++++++++++++++++
 internal/ui/chat/streaming_markdown.go       | 740 ++++++++++++++++
 internal/ui/chat/tools.go                    |   3 +
 internal/ui/chat/user.go                     |   3 +
 internal/ui/common/markdown.go               |  74 +-
 6 files changed, 1693 insertions(+), 10 deletions(-)
 create mode 100644 internal/ui/chat/incremental_glamour_test.go
 create mode 100644 internal/ui/chat/streaming_markdown.go

diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go
index e9f00020294abe2d707b645a111edf6181d424f4..412d85238e53ba33b8b21f7917b7e2298a424d20 100644
--- a/internal/ui/chat/assistant.go
+++ b/internal/ui/chat/assistant.go
@@ -137,6 +137,13 @@ type AssistantMessageItem struct {
 	thinkingSec assistantSection
 	contentSec  assistantSection
 	errorSec    assistantSection
+
+	// streamingContent caches a "stable prefix" glamour render of
+	// the assistant content body so each streaming flush only
+	// re-renders the trailing partial. F8 of
+	// docs/notes/2026-05-12-chat-rendering-perf.md. See
+	// streaming_markdown.go for the full algorithm.
+	streamingContent streamingMarkdown
 }
 
 var _ Expandable = (*AssistantMessageItem)(nil)
@@ -440,7 +447,10 @@ func (a *AssistantMessageItem) cachedError(width int) string {
 // lines so the visual box matches what the user sees today.
 func (a *AssistantMessageItem) renderThinking(thinking string, width int) string {
 	renderer := common.QuietMarkdownRenderer(a.sty, width)
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
 	rendered, err := renderer.Render(thinking)
+	mu.Unlock()
 	if err != nil {
 		rendered = thinking
 	}
@@ -489,14 +499,18 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string
 	return result
 }
 
-// renderMarkdown renders content as markdown.
+// renderMarkdown renders content as markdown. F8 routes the call
+// through streamingContent, which caches the glamour render of a
+// "stable prefix" so each streaming flush only re-renders the
+// trailing partial. The streaming cache invalidates itself on
+// width change and on any content that is not a prefix-extension
+// of the previously rendered content (e.g. user retried the
+// turn), and falls back to a full render whenever boundary
+// detection has the slightest doubt — see
+// findSafeMarkdownBoundary.
 func (a *AssistantMessageItem) renderMarkdown(content string, width int) string {
 	renderer := common.MarkdownRenderer(a.sty, width)
-	result, err := renderer.Render(content)
-	if err != nil {
-		return content
-	}
-	return strings.TrimSuffix(result, "\n")
+	return a.streamingContent.Render(content, width, renderer)
 }
 
 func (a *AssistantMessageItem) renderSpinning() string {
@@ -564,11 +578,15 @@ func (a *AssistantMessageItem) Finished() bool {
 // clearCache drops every cached render for this item, including the
 // per-section caches. Shadows the embedded cachedMessageItem.clearCache
 // so ClearItemCaches (style change) wipes the section caches too.
+// F8: also drop the streaming-markdown stable-prefix cache because
+// the cached glamour render embeds the OLD style's ANSI sequences
+// and is no longer visually consistent with the new style.
 func (a *AssistantMessageItem) clearCache() {
 	a.cachedMessageItem.clearCache()
 	a.thinkingSec.reset()
 	a.contentSec.reset()
 	a.errorSec.reset()
+	a.streamingContent.Reset()
 }
 
 // ToggleExpanded advances the F5 thinking view-mode cycle and returns
diff --git a/internal/ui/chat/incremental_glamour_test.go b/internal/ui/chat/incremental_glamour_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..7f6e7ac7380bcfee53dd35edb84387f2cd74973b
--- /dev/null
+++ b/internal/ui/chat/incremental_glamour_test.go
@@ -0,0 +1,853 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"charm.land/glamour/v2"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// newTestRenderer builds a fresh glamour renderer for the given
+// width. We deliberately do NOT share renderers between calls in
+// the equivalence tests so any hidden state in
+// [glamour.TermRenderer] cannot leak from a "cached" rendering
+// path into a "fresh" rendering path.
+func newTestRenderer(t *testing.T, width int) *glamour.TermRenderer {
+	t.Helper()
+	sty := styles.CharmtonePantera()
+	r, err := glamour.NewTermRenderer(
+		glamour.WithStyles(sty.Markdown),
+		glamour.WithWordWrap(width),
+	)
+	require.NoError(t, err)
+	return r
+}
+
+// freshRender renders content as a single document with a fresh
+// glamour renderer and applies the same trailing-newline trim
+// that streamingMarkdown.Render does. Use this for byte- and
+// visible-equivalence comparisons against the streaming path.
+func freshRender(t *testing.T, content string, width int) string {
+	t.Helper()
+	r := newTestRenderer(t, width)
+	out, err := r.Render(content)
+	require.NoError(t, err)
+	return strings.TrimSuffix(out, "\n")
+}
+
+// stripANSI removes all ANSI CSI escape sequences from s so two
+// renders with different colour state can be compared on their
+// visible glyphs alone.
+func stripANSI(s string) string {
+	var b strings.Builder
+	b.Grow(len(s))
+	i := 0
+	for i < len(s) {
+		if s[i] == 0x1b && i+1 < len(s) && s[i+1] == '[' {
+			j := i + 2
+			for j < len(s) {
+				c := s[j]
+				if c >= 0x40 && c <= 0x7e {
+					j++
+					break
+				}
+				j++
+			}
+			i = j
+			continue
+		}
+		b.WriteByte(s[i])
+		i++
+	}
+	return b.String()
+}
+
+// normalizeRender canonicalises a rendered glamour string for
+// visual-equivalence comparison: strip ANSI, drop per-line
+// trailing whitespace, drop leading/trailing blank lines, and
+// collapse consecutive blank lines to a single blank line.
+//
+// Glamour pads rendered lines with trailing spaces and adds top/
+// bottom block margins that differ subtly between "render the
+// whole document at once" and "render two halves and concatenate
+// them." Per F8 design principle D, those byte-level differences
+// are acceptable as long as the visible content matches; this
+// helper makes that comparison explicit.
+func normalizeRender(s string) string {
+	clean := stripANSI(s)
+	lines := strings.Split(clean, "\n")
+	for i, l := range lines {
+		lines[i] = strings.TrimRight(l, " \t")
+	}
+	// Collapse consecutive blank lines.
+	out := make([]string, 0, len(lines))
+	prevBlank := false
+	for _, l := range lines {
+		blank := l == ""
+		if blank && prevBlank {
+			continue
+		}
+		out = append(out, l)
+		prevBlank = blank
+	}
+	// Trim leading and trailing blanks.
+	for len(out) > 0 && out[0] == "" {
+		out = out[1:]
+	}
+	for len(out) > 0 && out[len(out)-1] == "" {
+		out = out[:len(out)-1]
+	}
+	return strings.Join(out, "\n")
+}
+
+// containsRawMarkdownSource reports whether the visible portion of
+// rendered contains literal markdown source markers that should
+// have been consumed by glamour. Used by T2 to assert that
+// intermediate streaming flushes don't leak raw source through to
+// the user. We deliberately only flag markers that glamour
+// removes during rendering ("```" fence delimiters, "|" table
+// pipes embedded in a line that also contains pipes — actual
+// table syntax — and bare "###" headers); pipes-in-prose and
+// dashes are too common to flag.
+func containsRawMarkdownSource(rendered string) bool {
+	clean := stripANSI(rendered)
+	if strings.Contains(clean, "```") {
+		return true
+	}
+	for _, line := range strings.Split(clean, "\n") {
+		if strings.HasPrefix(strings.TrimLeft(line, " \t"), "###") {
+			return true
+		}
+	}
+	return false
+}
+
+// -----------------------------------------------------------------------
+// T1: findSafeMarkdownBoundary unit tests.
+// -----------------------------------------------------------------------
+
+// TestFindSafeMarkdownBoundary_TableDriven exercises the
+// findSafeMarkdownBoundary decision tree across the full set of
+// constructs §4.4 calls out: plain paragraphs, fenced code (open
+// and closed), lists, tables, block quotes, and setext headers.
+func TestFindSafeMarkdownBoundary_TableDriven(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name    string
+		content string
+		// want is the expected boundary; -1 means "no safe
+		// boundary." When >=0 the test asserts content[:want]
+		// ends after a blank-line separator and content[:want]
+		// is a complete prefix.
+		want int
+	}{
+		{
+			name:    "empty",
+			content: "",
+			want:    -1,
+		},
+		{
+			name:    "single line",
+			content: "Just a single paragraph",
+			want:    -1,
+		},
+		{
+			name:    "two paragraphs",
+			content: "First paragraph.\n\nSecond paragraph.",
+			// boundary at start of "Second"
+			want: len("First paragraph.\n\n"),
+		},
+		{
+			name:    "three paragraphs picks latest",
+			content: "First.\n\nSecond.\n\nThird.",
+			want:    len("First.\n\nSecond.\n\n"),
+		},
+		{
+			name:    "open fence at end",
+			content: "Para.\n\n```go\nfoo()\n",
+			// no closing fence — every blank-line candidate
+			// before content end is INSIDE the fence (the open
+			// fence opened at offset 7). Actually the ONLY
+			// blank line is between "Para." and "```go", so
+			// candidate boundary is right before "```go". At
+			// that point fence count = 0, even, but the line
+			// AFTER (the first non-blank) is "```go" which
+			// would change rendering of the prefix… hmm,
+			// actually it wouldn't change the prefix's
+			// rendering because the prefix is just "Para.\n\n".
+			// The boundary would be ACCEPTED. Let's check
+			// what our impl does.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "inside open fence: no candidate after open",
+			content: "Para.\n\n```go\nfoo()\n\nbar()\n",
+			// blank line after "foo()" is INSIDE the fence
+			// (fence count at that prefix = 1, odd), must
+			// reject. The earlier blank line between "Para."
+			// and "```go" should still be safe (fence count
+			// at that prefix = 0).
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "closed fence followed by paragraph",
+			content: "Para1.\n\n```\nfoo()\n```\n\nPara2.",
+			// latest blank line is between "```" and "Para2.";
+			// fence count at that prefix = 2 (even), last
+			// non-blank line is "```" which is not a list/
+			// table/quote/setext.
+			want: len("Para1.\n\n```\nfoo()\n```\n\n"),
+		},
+		{
+			name:    "open list at end",
+			content: "Para.\n\n- one\n- two\n",
+			// last non-blank line of any blank-bounded prefix
+			// is a list item; our boundary check rejects.
+			// The blank line between "Para." and "- one" is
+			// the only candidate, but the line AFTER (first
+			// non-blank of suffix) is "- one" — that's fine,
+			// a list opening doesn't change the prefix's
+			// rendering. So the boundary BEFORE the list is
+			// accepted.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "list interior: no boundary",
+			content: "- one\n- two\n",
+			// no blank line at all.
+			want: -1,
+		},
+		{
+			name:    "closed list then paragraph",
+			content: "- one\n- two\n\nPara.",
+			// blank line after the list. Last non-blank line
+			// of prefix is "- two" — a list item — so the
+			// candidate is REJECTED. (Conservative: we don't
+			// know the list is "closed" without looking at
+			// what follows.)
+			want: -1,
+		},
+		{
+			name:    "table at end",
+			content: "Para.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |\n",
+			// blank-line candidate is between "Para." and
+			// table opener. Last non-blank line of prefix is
+			// "Para." — fine. Line AFTER is "| a | b |"
+			// which is a table line; doesn't retroactively
+			// change "Para." Boundary accepted.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "table interior with internal blank line: no late boundary",
+			content: "| a | b |\n| --- | --- |\n\n| 1 | 2 |\n",
+			// the blank line in the middle is followed by
+			// another table line. Last non-blank line of
+			// prefix is "| --- | --- |" which contains a
+			// pipe — we reject.
+			want: -1,
+		},
+		{
+			name:    "block quote at end",
+			content: "Para.\n\n> quoted\n> still quoted\n",
+			// Last non-blank line of any prefix that ends
+			// inside the quote block is a "> ..." line —
+			// rejected. The blank line BEFORE the quote
+			// gives a prefix of "Para.\n\n" — last non-blank
+			// "Para." — accepted.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "setext underline pending",
+			content: "Heading\n\n=====\n",
+			// blank line between "Heading" and "=====".
+			// Prefix = "Heading\n\n", last non-blank "Heading"
+			// — fine. But the FIRST non-blank line of the
+			// suffix is "=====", a setext-underline
+			// candidate. Splitting here would render the
+			// prefix as a paragraph "Heading", but the
+			// canonical render would treat the whole thing
+			// as a setext header. Reject.
+			//
+			// (Note: per CommonMark, a blank line between a
+			// paragraph and an underline actually breaks the
+			// setext, so the setext interpretation may not
+			// apply. But the boundary check is conservative
+			// — being wrong costs one slow frame, being
+			// over-aggressive costs visible breakage.)
+			want: -1,
+		},
+		{
+			name:    "indented code at end of prefix",
+			content: "Para.\n\n    code line\n\nNext.",
+			// prefix candidates:
+			//   "Para.\n\n" — last non-blank "Para.", accepted
+			//   "Para.\n\n    code line\n\n" — last non-blank
+			//   is "    code line" which is indented 4
+			//   spaces — REJECTED.
+			// Latest accepted is the first.
+			want: len("Para.\n\n"),
+		},
+	}
+
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			got := findSafeMarkdownBoundary(c.content)
+			require.Equalf(t, c.want, got,
+				"findSafeMarkdownBoundary(%q) = %d, want %d", c.content, got, c.want)
+			if got > 0 {
+				// Boundary must point to the start of a line
+				// (i.e. just after a newline) when the prefix
+				// is non-empty.
+				require.True(t, got <= len(c.content),
+					"boundary %d out of range (len=%d)", got, len(c.content))
+				if got > 0 && got <= len(c.content) {
+					require.Equal(t, byte('\n'), c.content[got-1],
+						"boundary %d does not sit immediately after a newline", got)
+				}
+			}
+		})
+	}
+}
+
+// -----------------------------------------------------------------------
+// T2: streaming-equivalence tests.
+// -----------------------------------------------------------------------
+
+// streamingScenarios returns the four canonical document shapes
+// that exercise different boundary-detection paths.
+func streamingScenarios() []struct {
+	name string
+	doc  string
+} {
+	return []struct {
+		name string
+		doc  string
+	}{
+		{
+			name: "plain-paragraphs",
+			doc: strings.Join([]string{
+				"This is the first paragraph of the document.",
+				"",
+				"Here is the second paragraph; it has some words.",
+				"",
+				"And a third paragraph for good measure.",
+				"",
+				"Finally a fourth paragraph to push past one boundary.",
+			}, "\n"),
+		},
+		{
+			name: "paragraphs-with-fence",
+			doc: strings.Join([]string{
+				"Intro paragraph.",
+				"",
+				"Some explanatory prose before the code.",
+				"",
+				"```go",
+				"func hello() {",
+				"\tfmt.Println(\"hi\")",
+				"}",
+				"```",
+				"",
+				"And a closing paragraph after the code block.",
+			}, "\n"),
+		},
+		{
+			name: "paragraphs-with-list",
+			doc: strings.Join([]string{
+				"Intro paragraph.",
+				"",
+				"- list item one",
+				"- list item two",
+				"- list item three",
+				"",
+				"Trailing paragraph.",
+			}, "\n"),
+		},
+		{
+			name: "paragraphs-with-table",
+			doc: strings.Join([]string{
+				"Intro paragraph.",
+				"",
+				"| col a | col b |",
+				"| ----- | ----- |",
+				"| 1     | 2     |",
+				"| 3     | 4     |",
+				"",
+				"Trailing paragraph after the table.",
+			}, "\n"),
+		},
+	}
+}
+
+// progressivePrefixes splits doc into n monotonically growing
+// byte prefixes, ending with the full document. n>=1.
+func progressivePrefixes(doc string, n int) []string {
+	if n < 1 {
+		n = 1
+	}
+	out := make([]string, 0, n)
+	for i := 1; i <= n; i++ {
+		// integer scaling so the last entry is exactly len(doc)
+		size := len(doc) * i / n
+		if i == n {
+			size = len(doc)
+		}
+		out = append(out, doc[:size])
+	}
+	return out
+}
+
+// TestStreamingMarkdown_FinalVisuallyEquivalent drives a sequence
+// of progressive prefixes through streamingMarkdown and asserts
+// the FINAL output is visually equivalent (per design principle
+// D) to a fresh full-document render. Strict byte-equality is
+// not the bar — see the comment in normalizeRender for why.
+func TestStreamingMarkdown_FinalVisuallyEquivalent(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	const steps = 15
+
+	for _, sc := range streamingScenarios() {
+		t.Run(sc.name, func(t *testing.T) {
+			t.Parallel()
+			renderer := newTestRenderer(t, width)
+			var sm streamingMarkdown
+			prefixes := progressivePrefixes(sc.doc, steps)
+
+			var lastOut string
+			for _, p := range prefixes {
+				lastOut = sm.Render(p, width, renderer)
+			}
+
+			fresh := freshRender(t, sc.doc, width)
+			require.Equal(t, normalizeRender(fresh), normalizeRender(lastOut),
+				"final streaming output must match a fresh full render visually")
+		})
+	}
+}
+
+// TestStreamingMarkdown_IntermediateOutputsPlausible asserts that
+// every intermediate flush returns a non-empty string and does
+// not leak raw markdown source through to the user. This is the
+// "visually plausible" half of T2.
+func TestStreamingMarkdown_IntermediateOutputsPlausible(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	const steps = 12
+
+	for _, sc := range streamingScenarios() {
+		t.Run(sc.name, func(t *testing.T) {
+			t.Parallel()
+			renderer := newTestRenderer(t, width)
+			var sm streamingMarkdown
+
+			for i, p := range progressivePrefixes(sc.doc, steps) {
+				if p == "" {
+					continue
+				}
+				out := sm.Render(p, width, renderer)
+				require.NotEmptyf(t, out, "step %d: empty render for prefix len %d", i, len(p))
+				require.Falsef(t, containsRawMarkdownSource(out),
+					"step %d: render leaked raw markdown source.\nprefix=%q\nout=%s",
+					i, p, normalizeRender(out))
+			}
+		})
+	}
+}
+
+// -----------------------------------------------------------------------
+// T3: cache invalidation tests.
+// -----------------------------------------------------------------------
+
+// TestStreamingMarkdown_WidthChangeInvalidates asserts that a
+// width change blows away the cached prefix so the next render
+// is keyed against the new width. We can't observe the cache
+// directly without reaching into the struct, so we assert the
+// observable contract: after a width change, the rendered output
+// reflects the new width AND the streamingMarkdown's internal
+// cache fields are reset to the new state.
+func TestStreamingMarkdown_WidthChangeInvalidates(t *testing.T) {
+	t.Parallel()
+
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	r80 := newTestRenderer(t, 80)
+	r40 := newTestRenderer(t, 40)
+	var sm streamingMarkdown
+
+	out80 := sm.Render(doc, 80, r80)
+	require.Equal(t, 80, sm.width, "width must be cached after first render")
+	cachedPrefix := sm.stablePrefix
+
+	out40 := sm.Render(doc, 40, r40)
+	require.Equal(t, 40, sm.width, "width change must update cached width")
+	require.NotEqual(t, out80, out40,
+		"different widths must produce different rendered output")
+	// stablePrefix may legitimately have re-advanced after the
+	// reset (tryAdvanceFromEmpty), but if it has, it can no
+	// longer carry the OLD width's render. We assert the cache
+	// reset by checking that the cached prefix length is at
+	// most the current content length.
+	require.True(t, len(sm.stablePrefix) <= len(doc),
+		"stable prefix must be a prefix of the current content")
+	_ = cachedPrefix
+}
+
+// TestStreamingMarkdown_NonPrefixContentInvalidates verifies
+// that content which is NOT a prefix-extension of the cached
+// stable prefix triggers a Reset and a fresh render path. This
+// guards the "user retried the turn" case.
+func TestStreamingMarkdown_NonPrefixContentInvalidates(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	// Drive a streaming sequence so the cache picks up a stable
+	// prefix.
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	for _, p := range progressivePrefixes(doc, 6) {
+		_ = sm.Render(p, width, r)
+	}
+	require.NotEmpty(t, sm.stablePrefix,
+		"stable prefix must be populated after streaming a multi-paragraph doc")
+
+	// Now switch to entirely different content (user retried).
+	other := "Completely different opening paragraph.\n\nAnd a second."
+	out := sm.Render(other, width, r)
+	require.NotEmpty(t, out)
+	// stablePrefix must be a prefix of `other`, i.e. cache was
+	// reset off the OLD content.
+	require.True(t, strings.HasPrefix(other, sm.stablePrefix),
+		"stable prefix must be reset to a prefix of the new content")
+
+	// Visual equivalence to a fresh render of `other`.
+	fresh := freshRender(t, other, width)
+	require.Equal(t, normalizeRender(fresh), normalizeRender(out),
+		"render after non-prefix content change must match a fresh render")
+}
+
+// TestStreamingMarkdown_ResetClearsCache asserts Reset() drops
+// every cached field; the next render is necessarily a full
+// render path.
+func TestStreamingMarkdown_ResetClearsCache(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	_ = sm.Render(doc, width, r)
+	// The sample doc has safe boundaries so the cache should
+	// have advanced. If for some reason it didn't, we still
+	// want Reset to be a no-op-safe operation; assert the
+	// post-Reset state directly.
+	sm.Reset()
+	require.Equal(t, 0, sm.width)
+	require.Equal(t, "", sm.stablePrefix)
+	require.Equal(t, "", sm.stablePrefixRender)
+
+	// Next render must be a full render path. Drive one step
+	// and verify the output matches a fresh full render.
+	out := sm.Render(doc, width, r)
+	fresh := freshRender(t, doc, width)
+	require.Equal(t, normalizeRender(fresh), normalizeRender(out))
+}
+
+// -----------------------------------------------------------------------
+// T4: fallback safety.
+// -----------------------------------------------------------------------
+
+// TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders covers
+// the "one giant table being built character by character" case.
+// Every flush must fall back to a full render; the cache must
+// not advance into an unsafe state. We compare each flush to a
+// fresh full render of the same prefix; bytes must match for
+// each prefix individually.
+//
+// (Byte equality is sound here because no concatenation happens:
+// the streaming path delegates straight to renderer.Render when
+// the cache is empty and no safe boundary exists.)
+func TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+
+	// One growing table — no blank lines anywhere, so no
+	// boundary candidate is ever found.
+	doc := strings.Join([]string{
+		"| col a | col b | col c |",
+		"| ----- | ----- | ----- |",
+		"| 1     | 2     | 3     |",
+		"| 4     | 5     | 6     |",
+		"| 7     | 8     | 9     |",
+		"| 10    | 11    | 12    |",
+		"| 13    | 14    | 15    |",
+		"| 16    | 17    | 18    |",
+		"| 19    | 20    | 21    |",
+		"| 22    | 23    | 24    |",
+	}, "\n")
+	require.Equal(t, -1, findSafeMarkdownBoundary(doc),
+		"sanity check: no blank lines, no safe boundary")
+
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	prefixes := progressivePrefixes(doc, 10)
+	for i, p := range prefixes {
+		if p == "" {
+			continue
+		}
+		out := sm.Render(p, width, r)
+		fresh := freshRender(t, p, width)
+		require.Equalf(t, fresh, out,
+			"step %d (len=%d): streaming output must byte-equal a fresh render when boundary detection fails",
+			i, len(p))
+	}
+	// Cache must remain empty: no boundary was ever found, no
+	// width change occurred, no advance ever cached anything.
+	require.Equal(t, "", sm.stablePrefix,
+		"stable prefix must remain empty when no safe boundary ever exists")
+}
+
+// TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash is the
+// minimum-viability assertion of T4: even when boundary
+// detection fails on every flush the streaming path must not
+// crash and must produce non-empty output for non-empty input.
+func TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	// A deeply-pathological input: a single line that grows
+	// one character at a time. There is never a blank-line
+	// separator so the cache is never advanced.
+	src := "The quick brown fox jumps over the lazy dog."
+	for i := 1; i <= len(src); i++ {
+		out := sm.Render(src[:i], width, r)
+		require.NotEmpty(t, out, "streaming output must not be empty for non-empty input")
+	}
+}
+
+// -----------------------------------------------------------------------
+// Integration assertions on the wired-in path.
+// -----------------------------------------------------------------------
+
+// -----------------------------------------------------------------------
+// T5 / T6 / T7: anywhere-in-prefix hazards (B1 / B2 / B3 from the
+// F8 round-2 review). For each hazard we drive every progressive
+// prefix of a document that exercises the hazard through the cache
+// and assert two contracts:
+//
+//  1. The cached stable prefix never contains the hazard. If the
+//     hazard line is at byte offset H, then after every flush
+//     len(sm.stablePrefix) <= H. This is the "no silent
+//     corruption" half — the algorithm cannot accept a boundary
+//     that splits across the hazard.
+//
+//  2. The final flush is visually equivalent to a fresh full
+//     render of the complete document. This is the same T2-style
+//     equivalence assertion ported to the new doc shapes.
+// -----------------------------------------------------------------------
+
+// nonBlankLines returns the non-blank visible lines of s with
+// per-line trailing whitespace trimmed. Used to compare two
+// rendered fragments for content equivalence when paragraph-
+// margin behaviour legitimately differs between a single fresh
+// render and a streaming split render (per F8 design principle D
+// — visual equivalence is the bar, byte-equivalence is not).
+//
+// Some glamour block types (notably HTML blocks and reference
+// link definitions) interact with adjacent paragraph blocks
+// during a single render — adjacency effectively suppresses the
+// blank-line margin between blocks. When the streaming path
+// renders the prefix and trail in separate calls, the seam is
+// re-introduced as a blank line. The visible TEXT is identical;
+// only the inter-block margin differs.
+func nonBlankLines(s string) []string {
+	clean := stripANSI(s)
+	out := make([]string, 0)
+	for _, l := range strings.Split(clean, "\n") {
+		l = strings.TrimRight(l, " \t")
+		if strings.TrimSpace(l) == "" {
+			continue
+		}
+		out = append(out, l)
+	}
+	return out
+}
+
+// runProgressiveBoundaryRespectTest is the shared body of T5/T6/T7.
+// It accepts a document and the byte offset of the line whose
+// PRESENCE in the prefix must trigger the hazard reject; the
+// cached stable prefix may never extend past hazardLineOffset.
+//
+// The final-output equivalence check is content-based (non-blank
+// lines compared) rather than full-normalization: see
+// nonBlankLines for the reason.
+func runProgressiveBoundaryRespectTest(t *testing.T, doc string, hazardLineOffset int) {
+	t.Helper()
+	const width = 80
+	const steps = 25
+
+	renderer := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	prefixes := progressivePrefixes(doc, steps)
+	var lastOut string
+	for i, p := range prefixes {
+		if p == "" {
+			continue
+		}
+		lastOut = sm.Render(p, width, renderer)
+		require.NotEmptyf(t, lastOut, "step %d: empty render", i)
+		require.LessOrEqualf(t, len(sm.stablePrefix), hazardLineOffset,
+			"step %d: cached stable prefix advanced past the hazard line\n"+
+				"prefix len=%d, hazard at %d, sm.stablePrefix=%q",
+			i, len(sm.stablePrefix), hazardLineOffset, sm.stablePrefix)
+	}
+
+	fresh := freshRender(t, doc, width)
+	require.Equal(t, nonBlankLines(fresh), nonBlankLines(lastOut),
+		"final streaming output must contain the same non-blank lines as a fresh full render")
+}
+
+// TestStreamingMarkdown_LooseListContinuation locks in the B1 fix.
+// A loose list followed by a continuation paragraph and then a
+// trailing paragraph creates a candidate boundary between the list
+// item and its continuation; the trailing non-blank line of that
+// candidate prefix is the continuation paragraph (not a list
+// marker), so the line-only check would accept it. The
+// anywhere-in-prefix list-marker check rejects it.
+func TestStreamingMarkdown_LooseListContinuation(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"- item one",
+		"",
+		"  continuation paragraph still belongs to item one",
+		"",
+		"- item two",
+		"",
+		"Trailing paragraph after the list.",
+	}, "\n")
+
+	// The first list marker line begins after "Intro paragraph.\n\n".
+	// The cached stable prefix may include that boundary (BEFORE
+	// the list opens) but must never advance into the list.
+	hazardOffset := strings.Index(doc, "- item one")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_HTMLBlock locks in the B2 fix. A raw HTML
+// block followed by a paragraph creates a candidate boundary
+// between the closed HTML block and the trailing paragraph. The
+// anywhere-in-prefix HTML-opener check rejects any boundary that
+// would include the HTML block in the stable prefix.
+func TestStreamingMarkdown_HTMLBlock(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"<div>",
+		"some block content",
+		"</div>",
+		"",
+		"Trailing paragraph after the HTML block.",
+	}, "\n")
+
+	hazardOffset := strings.Index(doc, "<div>")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_HTMLBlockType7 covers HTML block type 7
+// (CommonMark): a generic open/close tag whose name is NOT in the
+// fixed type-6 set still opens an HTML block and must forfeit any
+// boundary that would split the block off from following content.
+func TestStreamingMarkdown_HTMLBlockType7(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"<custom-tag>",
+		"some block content",
+		"</custom-tag>",
+		"",
+		"Trailing paragraph after the custom-tag block.",
+	}, "\n")
+
+	hazardOffset := strings.Index(doc, "<custom-tag>")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_LinkRefDefinition locks in the B3 fix. A
+// reference link definition followed by a paragraph that uses the
+// reference creates a boundary candidate between the def and the
+// paragraph; rendering them in separate glamour passes loses the
+// definition. The anywhere-in-prefix ref-def check rejects.
+func TestStreamingMarkdown_LinkRefDefinition(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"[ref]: http://example.com",
+		"",
+		"Trailing paragraph that links to [the example][ref] inline.",
+	}, "\n")
+
+	hazardOffset := strings.Index(doc, "[ref]:")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestAssistantStreamingContent_ResetOnClearCache guards the
+// integration contract that ClearItemCaches (style change) drops
+// the streaming-markdown cache. Without this, a style change
+// would leave the OLD style's ANSI sequences embedded in the
+// stable-prefix render and the next flush would visually mix
+// styles.
+func TestAssistantStreamingContent_ResetOnClearCache(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	msg := finishedAssistantMessage("stream-clear", doc)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 80
+	_ = item.RawRender(width)
+	// Drive a second message that extends the content so the
+	// streaming cache has a chance to advance (if it would).
+	doc2 := doc + "\n\nFour."
+	item.SetMessage(finishedAssistantMessage("stream-clear", doc2))
+	_ = item.RawRender(width)
+
+	// Now wipe the caches the way ClearItemCaches does.
+	item.clearCache()
+
+	require.Equal(t, "", item.streamingContent.stablePrefix,
+		"clearCache must Reset the streaming-markdown cache")
+	require.Equal(t, "", item.streamingContent.stablePrefixRender)
+	require.Equal(t, 0, item.streamingContent.width)
+}
diff --git a/internal/ui/chat/streaming_markdown.go b/internal/ui/chat/streaming_markdown.go
new file mode 100644
index 0000000000000000000000000000000000000000..98535a8543cf6b3b99a3372297bcc17094c918e6
--- /dev/null
+++ b/internal/ui/chat/streaming_markdown.go
@@ -0,0 +1,740 @@
+package chat
+
+import (
+	"strings"
+
+	"charm.land/glamour/v2"
+	"github.com/charmbracelet/crush/internal/ui/common"
+)
+
+// streamingMarkdown caches a "stable prefix" glamour render so each
+// streaming flush only re-renders the trailing portion of the
+// document. F8 of docs/notes/2026-05-12-chat-rendering-perf.md.
+//
+// The boundary between "stable" and "trailing" is detected by
+// [findSafeMarkdownBoundary]: a position immediately after a blank
+// line at which we can prove no markdown construct is open
+// (fenced code block, list, table, block quote, setext header).
+//
+// Two renders concatenated are NOT generally equal to a single
+// render of the whole document — glamour's wrap state is reset
+// between calls. The boundary check is therefore deliberately
+// conservative; whenever it has the slightest doubt the call
+// falls back to a full render and the cache is left untouched.
+//
+// Invariants:
+//
+//   - stablePrefix is always a literal byte prefix of the most
+//     recently rendered content. If a new content does not have
+//     stablePrefix as its prefix the cache is dropped.
+//   - stablePrefixRender is the glamour render of stablePrefix
+//     alone, with surrounding whitespace trimmed for clean
+//     concatenation.
+//   - width is the glamour wrap width that produced
+//     stablePrefixRender. A width change drops the cache.
+type streamingMarkdown struct {
+	width              int
+	stablePrefix       string
+	stablePrefixRender string
+}
+
+// Reset drops every cached field. After Reset the next Render call
+// is guaranteed to be a full render.
+func (s *streamingMarkdown) Reset() {
+	s.width = 0
+	s.stablePrefix = ""
+	s.stablePrefixRender = ""
+}
+
+// Render returns the glamour render of content at the given width,
+// reusing the cached stable-prefix render when it is safe to do so.
+// On any uncertainty the call falls back to a full render via
+// renderer and leaves the cache untouched (or drops it).
+//
+// The returned string has its trailing newline trimmed to match
+// the existing renderMarkdown contract on AssistantMessageItem.
+//
+// Concurrency: glamour's Render is stateful and not safe for
+// concurrent invocation on a shared renderer. Crush's TUI is
+// single-threaded so production never contends, but parallel
+// callers (most notably the test suite) must serialize. We hold
+// [common.LockMarkdownRenderer] for the entire prefix +
+// trailing render sequence so other goroutines cannot interleave
+// their own Render calls and corrupt goldmark's BlockStack.
+func (s *streamingMarkdown) Render(content string, width int, renderer *glamour.TermRenderer) string {
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
+	defer mu.Unlock()
+	full := func() string {
+		out, err := renderer.Render(content)
+		if err != nil {
+			return content
+		}
+		return strings.TrimSuffix(out, "\n")
+	}
+
+	// Width change OR content not a prefix-extension: drop cache,
+	// full render, optionally try to seed a fresh boundary on this
+	// call (step "f" in the design note).
+	if width != s.width || !strings.HasPrefix(content, s.stablePrefix) {
+		s.Reset()
+		s.width = width
+		out := full()
+		s.tryAdvanceFromEmpty(content, width, renderer)
+		return out
+	}
+
+	boundary := findSafeMarkdownBoundary(content)
+	if boundary < 0 {
+		// No safe boundary anywhere yet. Full render; do not
+		// modify the cache (a future flush may find one).
+		return full()
+	}
+
+	if boundary <= len(s.stablePrefix) {
+		// Cached prefix already covers an at-least-as-late
+		// boundary. Render the trailing partial fresh and glue.
+		trail := content[len(s.stablePrefix):]
+		return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer))
+	}
+
+	// boundary > len(stablePrefix): we have a NEW chunk of safe
+	// content. Render the new chunk, append to stablePrefixRender,
+	// promote the boundary, then render the remaining trail.
+	newChunk := content[len(s.stablePrefix):boundary]
+	newChunkRender := s.renderTrailing(newChunk, renderer)
+	s.stablePrefixRender = glueRenders(s.stablePrefixRender, newChunkRender)
+	s.stablePrefix = content[:boundary]
+
+	trail := content[boundary:]
+	if trail == "" {
+		// boundary == len(content): no trailing content. Returning
+		// the cached prefix render directly is correct.
+		return s.stablePrefixRender
+	}
+	return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer))
+}
+
+// tryAdvanceFromEmpty seeds the cache from a fresh state. We've
+// already paid the cost of a full render of `content`; if there is
+// a safe boundary inside it, render the prefix once more (cheap
+// relative to the full render we just did) and cache it so the
+// next flush can avoid the full work.
+//
+// This is the optional optimisation step "f" from the design
+// note. We render the prefix separately rather than try to
+// recover it from the full render output because two renders
+// concatenated ≠ a single render of the whole, and we prefer the
+// cached prefix render to be byte-for-byte what we'd produce on a
+// future cached call.
+func (s *streamingMarkdown) tryAdvanceFromEmpty(content string, width int, renderer *glamour.TermRenderer) {
+	boundary := findSafeMarkdownBoundary(content)
+	if boundary <= 0 {
+		return
+	}
+	prefix := content[:boundary]
+	out, err := renderer.Render(prefix)
+	if err != nil {
+		return
+	}
+	s.stablePrefix = prefix
+	s.stablePrefixRender = trimGlamourMargins(out)
+	s.width = width
+}
+
+// renderTrailing renders a trailing partial as a fresh glamour
+// document and trims the surrounding whitespace so it can be
+// concatenated to a cached prefix render without doubled blank
+// lines.
+func (s *streamingMarkdown) renderTrailing(text string, renderer *glamour.TermRenderer) string {
+	if text == "" {
+		return ""
+	}
+	out, err := renderer.Render(text)
+	if err != nil {
+		return text
+	}
+	return trimGlamourMargins(out)
+}
+
+// glueRenders concatenates two glamour-rendered fragments with a
+// single blank line separator. Glamour outputs typically carry
+// their own surrounding margins; trimming on both sides and
+// gluing with "\n\n" prevents the visible double-margin seam.
+//
+// Empty fragments are tolerated so the same helper works for the
+// "boundary == len(content)" path where there is no trailing
+// segment.
+func glueRenders(prefix, trail string) string {
+	prefix = trimGlamourMargins(prefix)
+	trail = trimGlamourMargins(trail)
+	switch {
+	case prefix == "" && trail == "":
+		return ""
+	case prefix == "":
+		return trail
+	case trail == "":
+		return prefix
+	default:
+		return prefix + "\n\n" + trail
+	}
+}
+
+// trimGlamourMargins strips leading and trailing whitespace
+// (including newlines) from a glamour-rendered fragment.
+// Glamour adds a leading blank line for documents that open with
+// a heading or paragraph, plus a trailing newline; both must be
+// removed before concatenation.
+func trimGlamourMargins(s string) string {
+	return strings.Trim(s, " \t\n")
+}
+
+// findSafeMarkdownBoundary returns the byte offset of the END of
+// the latest safe boundary in content, i.e. the offset such that
+// content[:boundary] is a valid stable-prefix candidate. The
+// returned offset always points immediately after a blank-line
+// separator, so concatenating a fresh render of content[boundary:]
+// to a cached render of content[:boundary] does not require glamour
+// to share state across the cut.
+//
+// Returns -1 when no safe boundary exists. SAFETY FIRST: any time
+// we have the slightest doubt we return -1 and let the caller fall
+// back to a full render.
+//
+// Decision tree, in order of preference (latest boundary wins):
+//
+//  1. Walk backward through every "blank line" position p such that
+//     content[:p] ends with "\n\n" (or "\n[ \t]*\n").
+//  2. For each candidate, check that content[:p] has an even
+//     number of triple-backtick fence lines (no open fenced
+//     block). Any odd count means we'd be cutting inside a fence
+//     and mis-syntax-highlighting the trailing partial.
+//     2b. Reject if any line in content[:p] (outside fenced blocks)
+//     is a list-marker line, an HTML-block opener, or a link
+//     reference definition. See [prefixHasOpenHazard] for the
+//     reasoning behind these "anywhere in prefix" rejects.
+//  3. Reject if the last non-blank line of content[:p] is:
+//     - a list item marker line ("^\s*([-*+]|\d+\.)\s")
+//     - a table line (contains "|")
+//     - a block quote ("^\s*>")
+//     - a setext header underline ("^=+\s*$" or "^-+\s*$")
+//     - an indented code line (4+ leading spaces or a tab)
+//  4. Reject if the line immediately AFTER the boundary (skipping
+//     leading blank lines) looks like a setext underline (a line
+//     of '=' or '-' only). Rendering the prefix as a paragraph
+//     would change once the underline arrived; that's exactly the
+//     "splitting changes the prefix render" hazard §4.4 calls out.
+//
+// Returns the byte offset of the first character AFTER the blank
+// line, i.e. the start of the trailing segment.
+func findSafeMarkdownBoundary(content string) int {
+	if len(content) == 0 {
+		return -1
+	}
+
+	// Iterate every blank-line position from latest to earliest.
+	for p := blankLineBefore(content, len(content)); p > 0; p = blankLineBefore(content, p-1) {
+		if !isSafeBoundaryAt(content, p) {
+			continue
+		}
+		return p
+	}
+	return -1
+}
+
+// blankLineBefore returns the byte offset of the first character
+// AFTER the latest blank-line separator that ends strictly before
+// `until`. A blank-line separator is a sequence "\n([ \t]*\n)+"
+// — one newline, then one or more lines containing only spaces or
+// tabs and terminated by another newline. The returned offset is
+// the start of the first non-blank line that follows the
+// separator (or the position immediately after the final newline,
+// if no further content remains).
+//
+// Returns -1 when no blank-line separator exists before `until`.
+func blankLineBefore(content string, until int) int {
+	if until <= 0 {
+		return -1
+	}
+	// Walk backward looking for a newline followed (after optional
+	// blank-line content) by another newline. We track the latest
+	// newline we've seen; if the next earlier newline has only
+	// blank chars between them, we have a blank-line separator
+	// and the boundary sits immediately after the latest newline.
+	end := until
+	for end > 0 {
+		nl := strings.LastIndexByte(content[:end], '\n')
+		if nl < 0 {
+			return -1
+		}
+		// Look for an earlier newline whose gap to nl is empty
+		// or whitespace only.
+		prev := strings.LastIndexByte(content[:nl], '\n')
+		for prev >= 0 {
+			gap := content[prev+1 : nl]
+			if isBlankOrSpaces(gap) {
+				return nl + 1
+			}
+			// Gap had non-whitespace; nl is not a blank-line
+			// separator. Move up: try with the earlier newline as
+			// the new "nl" candidate.
+			break
+		}
+		end = nl
+	}
+	return -1
+}
+
+// isBlankOrSpaces reports whether s consists entirely of spaces
+// and tabs (or is empty).
+func isBlankOrSpaces(s string) bool {
+	for i := range len(s) {
+		if s[i] != ' ' && s[i] != '\t' {
+			return false
+		}
+	}
+	return true
+}
+
+// isSafeBoundaryAt reports whether content[:p] is a safe stable
+// prefix. p must be a blank-line boundary (start of a line, with a
+// blank line immediately preceding).
+//
+// Beyond the last-line checks, three "anywhere in the prefix"
+// hazards force a reject because they cannot be reliably reasoned
+// about by inspecting the trailing line alone. For each of these
+// the simplest, safest rule was chosen — see prefixHasOpenHazard.
+func isSafeBoundaryAt(content string, p int) bool {
+	prefix := content[:p]
+
+	// (2) Even number of triple-backtick fence lines.
+	if countFenceLines(prefix)%2 != 0 {
+		return false
+	}
+
+	// (2b) Anywhere-in-prefix hazards: open list (B1), HTML block
+	// opener (B2), reference link definition (B3). Any of these
+	// anywhere in the prefix forces a fallback.
+	if prefixHasOpenHazard(prefix) {
+		return false
+	}
+
+	// (3) Inspect the last non-blank line of the prefix.
+	lastLine := lastNonBlankLine(prefix)
+	if lastLine != "" && lineOpensConstruct(lastLine) {
+		return false
+	}
+
+	// (4) If anything follows, make sure it doesn't look like a
+	// setext underline that would retroactively turn the last
+	// paragraph of the prefix into a header.
+	if rest := content[p:]; rest != "" {
+		first := firstNonBlankLine(rest)
+		if isSetextUnderlineCandidate(first) {
+			return false
+		}
+	}
+
+	return true
+}
+
+// prefixHasOpenHazard reports whether prefix contains any of three
+// constructs that cannot be safely cut at a blank-line boundary
+// even when the immediately preceding line looks fine. Each check
+// uses the SIMPLEST viable conservative rule per the F8 round-2
+// review:
+//
+//	B1 (loose lists). A loose list has a blank line between an item
+//	   and a continuation paragraph that begins with indentation
+//	   but no list marker. If a candidate boundary lands on that
+//	   blank line, the prefix's trailing non-blank line is the
+//	   continuation paragraph, NOT a list marker, so the last-line
+//	   check would accept it even though the list is still open.
+//
+//	   Rule chosen: any list-marker line ANYWHERE in the prefix
+//	   forces -1. This is overly conservative — it forfeits
+//	   boundary advancement past a closed list — but it eliminates
+//	   the entire bug class with zero parsing of CommonMark's
+//	   loose-list closure semantics. We retain the most useful
+//	   boundary in practice: the one BEFORE the list opens (no
+//	   marker has appeared in the prefix yet).
+//
+//	B2 (HTML blocks). CommonMark defines seven HTML-block opener
+//	   patterns (script/pre/style/textarea, comments, processing
+//	   instructions, CDATA, declarations, recognised tag names).
+//	   If the prefix opens an HTML block that the suffix closes,
+//	   splitting renders the prefix as raw HTML and the suffix as
+//	   prose.
+//
+//	   Rule chosen: any HTML-block opener anywhere in the prefix
+//	   forces -1. Same trade-off as B1 — the typical assistant
+//	   output contains no raw HTML, so the perf cost is zero in
+//	   the common case.
+//
+//	B3 (reference link definitions). A line of the form
+//	   "[label]: <url>" defines a link reference that the suffix
+//	   may later use as "[text][label]". Splitting the document
+//	   loses the definition because each half is rendered as an
+//	   independent glamour document.
+//
+//	   Rule chosen: any reference link definition line anywhere in
+//	   the prefix forces -1. Suffix-side reference detection is
+//	   fragile (three syntaxes: [text][label], [label][], [label]),
+//	   so the prefix-side check is the simpler safe choice.
+//
+// All three rules accept the perf hit of "no boundary after a
+// list / HTML block / link def" in exchange for guaranteed
+// soundness. If profiling shows this kills the F8 win on real
+// streaming traces, the next iteration can promote each rule to
+// its less-conservative variant (closure-aware list tracking,
+// per-tag HTML close detection, suffix-aware ref tracking).
+func prefixHasOpenHazard(prefix string) bool {
+	inFence := false
+	for line := range splitLines(prefix) {
+		// Track fenced state so list/html/ref patterns inside a
+		// fenced code block do not falsely trigger the hazards.
+		if isFenceLine(line) {
+			inFence = !inFence
+			continue
+		}
+		if inFence {
+			continue
+		}
+		trimmed := strings.TrimLeft(line, " \t")
+		if trimmed == "" {
+			continue
+		}
+		// B1: any list-item marker.
+		if isListItemMarker(trimmed) {
+			return true
+		}
+		// B2: HTML block opener.
+		if isHTMLBlockOpener(line) {
+			return true
+		}
+		// B3: link reference definition.
+		if isLinkRefDefinition(line) {
+			return true
+		}
+	}
+	return false
+}
+
+// countFenceLines counts lines that begin a fenced code block in
+// the CommonMark sense: a line whose first non-whitespace run is
+// at least three consecutive backticks (or tildes). Each such
+// line toggles the fenced state, so an even count means every
+// opened fence has been closed.
+//
+// We accept up to three leading spaces of indentation (CommonMark
+// rule) and require the fence characters to be the FIRST
+// non-whitespace content of the line. We deliberately do NOT
+// attempt to parse info-strings or differentiate opener from
+// closer beyond toggling — a closing fence is just any line
+// whose first non-whitespace run is >=3 of the same fence char.
+func countFenceLines(s string) int {
+	n := 0
+	for line := range splitLines(s) {
+		if isFenceLine(line) {
+			n++
+		}
+	}
+	return n
+}
+
+// isFenceLine reports whether line opens or closes a fenced code
+// block.
+func isFenceLine(line string) bool {
+	// Strip up to 3 spaces of indentation.
+	i := 0
+	for i < len(line) && i < 3 && line[i] == ' ' {
+		i++
+	}
+	if i >= len(line) {
+		return false
+	}
+	c := line[i]
+	if c != '`' && c != '~' {
+		return false
+	}
+	run := 0
+	for i < len(line) && line[i] == c {
+		i++
+		run++
+	}
+	return run >= 3
+}
+
+// lastNonBlankLine returns the last non-blank line of s, or ""
+// when every line is blank.
+func lastNonBlankLine(s string) string {
+	last := ""
+	for line := range splitLines(s) {
+		if strings.TrimSpace(line) != "" {
+			last = line
+		}
+	}
+	return last
+}
+
+// firstNonBlankLine returns the first non-blank line of s, or ""
+// when every line is blank.
+func firstNonBlankLine(s string) string {
+	for line := range splitLines(s) {
+		if strings.TrimSpace(line) != "" {
+			return line
+		}
+	}
+	return ""
+}
+
+// splitLines yields the lines of s without their terminators. The
+// final segment is yielded even if not newline-terminated.
+func splitLines(s string) func(yield func(string) bool) {
+	return func(yield func(string) bool) {
+		start := 0
+		for i := 0; i < len(s); i++ {
+			if s[i] == '\n' {
+				if !yield(s[start:i]) {
+					return
+				}
+				start = i + 1
+			}
+		}
+		if start <= len(s)-1 {
+			yield(s[start:])
+		}
+	}
+}
+
+// lineOpensConstruct reports whether line keeps a markdown
+// construct open across the boundary. We err conservatively —
+// any case that smells like list/table/quote/setext/indented-code
+// returns true.
+func lineOpensConstruct(line string) bool {
+	// Indented code: a tab, or 4+ leading spaces.
+	if len(line) > 0 && line[0] == '\t' {
+		return true
+	}
+	if strings.HasPrefix(line, "    ") {
+		return true
+	}
+
+	trimmed := strings.TrimLeft(line, " \t")
+	if trimmed == "" {
+		return false
+	}
+
+	// Block quote.
+	if trimmed[0] == '>' {
+		return true
+	}
+
+	// List item: "- " "* " "+ " or "<digits>. " or "<digits>) ".
+	if isListItemMarker(trimmed) {
+		return true
+	}
+
+	// Table: any pipe character anywhere in the line. Conservative:
+	// pipe-in-prose is rare and the cost of bailing is one slow
+	// frame.
+	if strings.ContainsRune(line, '|') {
+		return true
+	}
+
+	// Setext underline candidate as the LAST line of the prefix:
+	// this would be a setext header for an even-earlier paragraph.
+	// Refuse to split at all in this case — the boundary is right
+	// in the middle of a header.
+	if isSetextUnderlineCandidate(trimmed) {
+		return true
+	}
+
+	return false
+}
+
+// isListItemMarker reports whether line (already left-trimmed)
+// starts with a CommonMark list-item marker followed by a space
+// or tab.
+func isListItemMarker(line string) bool {
+	if line == "" {
+		return false
+	}
+	c := line[0]
+	if c == '-' || c == '*' || c == '+' {
+		if len(line) >= 2 && (line[1] == ' ' || line[1] == '\t') {
+			return true
+		}
+		return false
+	}
+	// Ordered list: digits followed by '.' or ')' and a space.
+	i := 0
+	for i < len(line) && line[i] >= '0' && line[i] <= '9' {
+		i++
+	}
+	if i == 0 || i > 9 {
+		return false
+	}
+	if i >= len(line) {
+		return false
+	}
+	if line[i] != '.' && line[i] != ')' {
+		return false
+	}
+	if i+1 >= len(line) {
+		return false
+	}
+	return line[i+1] == ' ' || line[i+1] == '\t'
+}
+
+// isSetextUnderlineCandidate reports whether line (with optional
+// leading whitespace) consists entirely of '=' or entirely of '-'
+// characters with optional trailing whitespace. CommonMark
+// requires no leading whitespace on the underline; we accept up
+// to three spaces for safety so an indented underline still
+// blocks a split.
+func isSetextUnderlineCandidate(line string) bool {
+	// Strip leading whitespace.
+	i := 0
+	for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+		i++
+	}
+	if i == len(line) {
+		return false
+	}
+	c := line[i]
+	if c != '=' && c != '-' {
+		return false
+	}
+	j := i
+	for j < len(line) && line[j] == c {
+		j++
+	}
+	// Allow trailing whitespace.
+	for j < len(line) {
+		if line[j] != ' ' && line[j] != '\t' {
+			return false
+		}
+		j++
+	}
+	// Need at least one underline character. "-" alone is also a
+	// list marker without a trailing space; the listItem check
+	// covers the marker case before we get here.
+	return j-i >= 1
+}
+
+// isHTMLBlockOpener reports whether line begins one of the seven
+// CommonMark HTML block patterns. We accept up to three spaces of
+// leading indentation (CommonMark rule). Matching is intentionally
+// loose — we only need to know the line "looks like an HTML
+// block start", not parse the contained markup.
+func isHTMLBlockOpener(line string) bool {
+	// Strip up to 3 spaces of indentation.
+	i := 0
+	for i < len(line) && i < 3 && line[i] == ' ' {
+		i++
+	}
+	rest := line[i:]
+	if len(rest) < 2 || rest[0] != '<' {
+		return false
+	}
+
+	// Type 2: HTML comment "<!--".
+	if strings.HasPrefix(rest, "<!--") {
+		return true
+	}
+	// Type 3: processing instruction "<?".
+	if strings.HasPrefix(rest, "<?") {
+		return true
+	}
+	// Type 5: CDATA "<![CDATA[".
+	if strings.HasPrefix(rest, "<![CDATA[") {
+		return true
+	}
+	// Type 4: declaration "<!" followed by an ASCII letter.
+	if len(rest) >= 3 && rest[1] == '!' && isASCIILetter(rest[2]) {
+		return true
+	}
+
+	// Type 1: <script | <pre | <style | <textarea (case-insensitive)
+	// followed by whitespace, '>', end-of-line, or other non-name
+	// terminators. Use a permissive HasPrefix check on lowercase.
+	low := strings.ToLower(rest)
+	for _, t := range []string{"<script", "<pre", "<style", "<textarea"} {
+		if strings.HasPrefix(low, t) {
+			next := byte(0)
+			if len(low) > len(t) {
+				next = low[len(t)]
+			}
+			if next == 0 || next == ' ' || next == '\t' || next == '>' {
+				return true
+			}
+		}
+	}
+
+	// Types 6 & 7: open or close of a block-level tag.
+	//
+	// Type 6 matches a fixed CommonMark tag set; type 7 matches any
+	// otherwise-valid open/close tag whose name is not in the
+	// script/pre/style/textarea family. We collapse both into a
+	// single check: the line must start with '<' or '</' followed
+	// by an ASCII letter. This deliberately mirrors the other
+	// hazards — when in doubt, forfeit the boundary. Lines like
+	// "<3", "<-", "<<", or mid-line "<foo>" do NOT trigger because
+	// we require the line to *start* (after up to 3 spaces) with
+	// '<letter' or '</letter'.
+	j := 1 // past '<'
+	if j < len(rest) && rest[j] == '/' {
+		j++
+	}
+	if j >= len(rest) || !isASCIILetter(rest[j]) {
+		return false
+	}
+	return true
+}
+
+// isASCIILetter reports whether b is an ASCII letter.
+func isASCIILetter(b byte) bool {
+	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
+}
+
+// isLinkRefDefinition reports whether line matches a CommonMark
+// link reference definition opener. The conservative pattern:
+//
+//	^[ ]{0,3}\[[^\]]+\]:\s*\S+
+//
+// i.e. up to 3 spaces, then a bracketed label (no nested ']'),
+// then a colon, then whitespace, then at least one non-whitespace
+// character of destination. We do not validate the destination —
+// presence of a ref-def opener anywhere in the prefix is enough
+// to forfeit the boundary.
+func isLinkRefDefinition(line string) bool {
+	i := 0
+	for i < len(line) && i < 3 && line[i] == ' ' {
+		i++
+	}
+	if i >= len(line) || line[i] != '[' {
+		return false
+	}
+	i++
+	labelStart := i
+	for i < len(line) && line[i] != ']' {
+		i++
+	}
+	if i >= len(line) || i == labelStart {
+		// No closing bracket, or empty label.
+		return false
+	}
+	// i points at ']'.
+	i++
+	if i >= len(line) || line[i] != ':' {
+		return false
+	}
+	i++
+	// Skip required whitespace.
+	for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+		i++
+	}
+	// At least one non-whitespace character of destination.
+	return i < len(line)
+}
diff --git a/internal/ui/chat/tools.go b/internal/ui/chat/tools.go
index 5855cbe2698e1defe4a5c8800ccbb6c71356c926..cac2e227aeab5b12c9e99c5777e0e2f810462ee3 100644
--- a/internal/ui/chat/tools.go
+++ b/internal/ui/chat/tools.go
@@ -1028,7 +1028,10 @@ func toolOutputMarkdownContent(sty *styles.Styles, content string, width int, ex
 	}
 
 	renderer := common.QuietMarkdownRenderer(sty, width)
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
 	rendered, err := renderer.Render(content)
+	mu.Unlock()
 	if err != nil {
 		return toolOutputPlainContent(sty, content, width, expanded)
 	}
diff --git a/internal/ui/chat/user.go b/internal/ui/chat/user.go
index 0f11c00a68f7aec9013ce2d80b73e0ab4df9c1d9..2aca4495b2dd15e115416a564daa072c4b1421a8 100644
--- a/internal/ui/chat/user.go
+++ b/internal/ui/chat/user.go
@@ -57,7 +57,10 @@ func (m *UserMessageItem) RawRender(width int) string {
 	renderer := common.MarkdownRenderer(m.sty, cappedWidth)
 
 	msgContent := strings.TrimSpace(m.message.Content().Text)
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
 	result, err := renderer.Render(msgContent)
+	mu.Unlock()
 	if err != nil {
 		content = msgContent
 	} else {
diff --git a/internal/ui/common/markdown.go b/internal/ui/common/markdown.go
index 8ba45bccd263350607763af4813f2b6b047a1d7f..a2a2d3d22ad10ff6cce5f0483d43a33405266cde 100644
--- a/internal/ui/common/markdown.go
+++ b/internal/ui/common/markdown.go
@@ -19,6 +19,12 @@ func init() {
 	formatters.Register(formatterName, xchroma.Formatter(zero, nil))
 }
 
+// mdCacheMu guards mdCache and quietMDCache.
+//
+// Lock ordering: when both mdCacheMu and rendererLocksMu are
+// needed (only in InvalidateMarkdownRendererCache), acquire
+// mdCacheMu FIRST, then rendererLocksMu. No other call site may
+// hold rendererLocksMu while acquiring mdCacheMu.
 var (
 	mdCacheMu    sync.Mutex
 	mdCache      = map[int]*glamour.TermRenderer{}
@@ -29,6 +35,13 @@ var (
 // the given styles and width. Renderers are memoized per width and shared
 // across callers; call InvalidateMarkdownRendererCache when the active
 // styles change.
+//
+// The returned renderer is NOT safe for concurrent Render calls
+// (goldmark's BlockStack carries state across the public Render
+// API). Crush's TUI is single-threaded so production never
+// contends, but parallel callers (most notably parallel tests)
+// must serialize via [LockMarkdownRenderer]. Treat the renderer
+// as effectively pinned to one goroutine at a time.
 func MarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer {
 	mdCacheMu.Lock()
 	defer mdCacheMu.Unlock()
@@ -46,7 +59,8 @@ func MarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer {
 
 // QuietMarkdownRenderer returns a glamour [glamour.TermRenderer] with no colors
 // (plain text with structure) and the given width. Renderers are memoized per
-// width and shared across callers.
+// width and shared across callers. Same concurrency contract as
+// [MarkdownRenderer]: serialize via [LockMarkdownRenderer].
 func QuietMarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer {
 	mdCacheMu.Lock()
 	defer mdCacheMu.Unlock()
@@ -62,12 +76,64 @@ func QuietMarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer
 	return r
 }
 
-// InvalidateMarkdownRendererCache drops every cached renderer. Call this
-// whenever the active styles change so subsequent renderers pick up the new
-// ansi.StyleConfig.
+// InvalidateMarkdownRendererCache drops every cached renderer
+// AND every per-renderer mutex in a single atomic critical
+// section so the two maps cannot disagree mid-toggle. Call this
+// whenever the active styles change so subsequent renderers
+// pick up the new ansi.StyleConfig.
+//
+// Existing holders of an old mutex (mid-Render goroutines) keep
+// their reference safely; new renderers minted after the
+// invalidation get freshly minted mutexes.
+//
+// Lock ordering: mdCacheMu is acquired first, then
+// rendererLocksMu — see the comments on each mutex.
 func InvalidateMarkdownRendererCache() {
 	mdCacheMu.Lock()
 	defer mdCacheMu.Unlock()
+	rendererLocksMu.Lock()
+	defer rendererLocksMu.Unlock()
+
 	mdCache = map[int]*glamour.TermRenderer{}
 	quietMDCache = map[int]*glamour.TermRenderer{}
+	rendererLocks = map[*glamour.TermRenderer]*sync.Mutex{}
+}
+
+// rendererLocksMu guards rendererLocks. We key per-renderer
+// mutexes by pointer so the lock granularity matches the
+// renderer cache granularity (one mutex per (width, palette)
+// renderer instance, not one mutex for the entire cache).
+//
+// Lock ordering: when both mdCacheMu and rendererLocksMu are
+// needed (only in InvalidateMarkdownRendererCache), acquire
+// mdCacheMu FIRST, then rendererLocksMu.
+var (
+	rendererLocksMu sync.Mutex
+	rendererLocks   = map[*glamour.TermRenderer]*sync.Mutex{}
+)
+
+// LockMarkdownRenderer returns the per-renderer mutex used to
+// serialize concurrent Render calls on a shared
+// [glamour.TermRenderer] instance. The returned [*sync.Mutex] is
+// stable for the lifetime of the renderer (i.e. until
+// [InvalidateMarkdownRendererCache] is called).
+//
+// Callers that issue more than one Render call in the same
+// logical operation should hold the mutex for the entire
+// sequence so other goroutines do not interleave their own
+// Render calls and corrupt the renderer state. F8's
+// streamingMarkdown is the immediate consumer; other call
+// sites that today issue exactly one Render call per item
+// render are safe without locking under the single-threaded
+// TUI Update loop, but should adopt this lock if they ever run
+// in parallel (e.g. background prerender workers).
+func LockMarkdownRenderer(r *glamour.TermRenderer) *sync.Mutex {
+	rendererLocksMu.Lock()
+	defer rendererLocksMu.Unlock()
+	if mu, ok := rendererLocks[r]; ok {
+		return mu
+	}
+	mu := &sync.Mutex{}
+	rendererLocks[r] = mu
+	return mu
 }

From 65f30a13430a93ef3f271adb3cee4b535ade3de6 Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Tue, 12 May 2026 16:37:51 -0400
Subject: [PATCH 08/10] perf(chat): skip re-parsing the rendered chat when
 nothing has changed

Each frame the chat list is turned into a string with ANSI color codes
that we then have to parse back into cells before drawing. When the
chat list output is the same as the previous frame, we now reuse the
cells we already produced and skip the parsing step.

Co-Authored-By: Charm Crush <crush@charm.land>
---
 internal/ui/model/chat.go                 | 113 +++++++-
 internal/ui/model/chat_draw_cache_test.go | 334 ++++++++++++++++++++++
 2 files changed, 446 insertions(+), 1 deletion(-)
 create mode 100644 internal/ui/model/chat_draw_cache_test.go

diff --git a/internal/ui/model/chat.go b/internal/ui/model/chat.go
index 177daf2570b1f2738ba596b43390df99c8550d7f..3b717d2644e3225239942aa7b6c3745f42d77745 100644
--- a/internal/ui/model/chat.go
+++ b/internal/ui/model/chat.go
@@ -63,6 +63,32 @@ type Chat struct {
 	// follow is a flag to indicate whether the view should auto-scroll to
 	// bottom on new messages.
 	follow bool
+
+	// drawCache memoizes the decoded form of the last list.Render output so
+	// repeat frames with byte-identical content skip the per-cell ANSI
+	// reparse that uv.StyledString.Draw performs every call. See F9
+	// (docs/notes/2026-05-12-chat-rendering-perf.md §4.8). Bounded to one
+	// entry; invalidated implicitly by string inequality on the next Draw.
+	drawCache *chatDrawCache
+}
+
+// chatDrawCache holds the pre-decoded form of the last list.Render output.
+// The cache is keyed by the rendered string and the screen's width method
+// (graphemes vs wcwidth pick different decoders inside ultraviolet's
+// printString, so a cached buffer is only valid for the method it was
+// decoded with). The cached buffer is independent of the draw area, so
+// resize / scroll changes that produce the same string still hit. We cannot
+// use uv.StyledString.Lines because it bottoms out at the first iteration
+// against a zero-bounds rectangle (see ultraviolet styled.go line 45 — the
+// shared printString loop's `y >= bounds.Max.Y` exit applies to the
+// line-building branch too). A Buffer of the rendered text's natural
+// dimensions is the cheapest correct shape: StyledString.Draw runs once on
+// miss to populate it, and Buffer.Draw is an O(cells) cell copy with no
+// ANSI re-parse on hit.
+type chatDrawCache struct {
+	rendered string
+	method   ansi.Method
+	buf      uv.ScreenBuffer
 }
 
 // NewChat creates a new instance of [Chat] that handles chat interactions and
@@ -89,8 +115,93 @@ func (m *Chat) Height() int {
 }
 
 // Draw renders the chat UI component to the screen and the given area.
+//
+// The list's rendered output is cached in decoded form (see chatDrawCache) so
+// that frames with byte-identical content skip the ANSI reparse that
+// uv.StyledString.Draw performs on every call. The cache is keyed by the
+// rendered string and the screen's width method; area / scroll changes do not
+// invalidate it.
 func (m *Chat) Draw(scr uv.Screen, area uv.Rectangle) {
-	uv.NewStyledString(m.list.Render()).Draw(scr, area)
+	rendered := m.list.Render()
+	method, ok := scr.WidthMethod().(ansi.Method)
+	if !ok {
+		// Width method isn't an ansi.Method (unlikely in practice — both
+		// TerminalScreen and ScreenBuffer store ansi.Method). Fall back
+		// to the uncached path so behavior matches upstream exactly.
+		uv.NewStyledString(rendered).Draw(scr, area)
+		return
+	}
+	if m.drawCache == nil ||
+		m.drawCache.rendered != rendered ||
+		m.drawCache.method != method {
+		m.drawCache = newChatDrawCache(rendered, method)
+	}
+	drawCachedBuffer(scr, area, m.drawCache.buf)
+}
+
+// newChatDrawCache builds a chatDrawCache for the given rendered string by
+// running uv.StyledString.Draw into a fresh buffer sized to the text's
+// natural bounds under the active width method. This is the only place
+// ANSI decoding happens for cached frames — subsequent draws reuse buf
+// via drawCachedBuffer.
+//
+// We can't use uv.StyledString.Bounds() here: it is hard-coded to
+// ansi.GraphemeWidth, while StyledString.Draw lays cells using the
+// destination buffer's WidthMethod (which we capture in `method`). For
+// strings where graphemes and wcwidth disagree (emoji ZWJ sequences,
+// some CJK, certain combining marks) the two answers diverge, leaving
+// the cached buffer either too small (trailing cells dropped on hit) or
+// too large (dead cells past the live content). Computing dimensions
+// with `method.StringWidth` per line matches what printString tallies
+// cell-by-cell, since both decode ANSI sequences and use the same width
+// method.
+func newChatDrawCache(rendered string, method ansi.Method) *chatDrawCache {
+	w, h := renderedBounds(rendered, method)
+	if w <= 0 {
+		w = 1
+	}
+	if h <= 0 {
+		h = 1
+	}
+	buf := uv.NewScreenBuffer(w, h)
+	buf.Method = method
+	uv.NewStyledString(rendered).Draw(buf, buf.Bounds())
+	return &chatDrawCache{
+		rendered: rendered,
+		method:   method,
+		buf:      buf,
+	}
+}
+
+// renderedBounds returns the (width, height) cell extent of rendered
+// when laid out by method. Width is the widest line's StringWidth (which
+// strips ANSI sequences and tallies cells via method, exactly like
+// printString); height is the line count. Both match what
+// uv.StyledString.Draw will write into a buffer whose WidthMethod is
+// method, so the cache buffer is always sized to fit the live content.
+func renderedBounds(rendered string, method ansi.Method) (w, h int) {
+	for line := range strings.SplitSeq(rendered, "\n") {
+		w = max(w, method.StringWidth(line))
+		h++
+	}
+	return w, h
+}
+
+// drawCachedBuffer blits a previously-decoded buffer into scr at area,
+// mirroring uv.StyledString.Draw's screen-mode behavior for the default
+// Wrap=false, Tail="" case. The clear loop matches StyledString.Draw line
+// 51-56; the buf.Draw call replaces the per-cell ANSI decode that
+// printString does on every uncached frame with a pure cell copy.
+func drawCachedBuffer(scr uv.Screen, area uv.Rectangle, buf uv.ScreenBuffer) {
+	// Clear the area first to match StyledString.Draw — leftover cells
+	// from a previous frame outside the new content must be zeroed,
+	// because Buffer.Draw skips empty cells (it doesn't clear).
+	for y := area.Min.Y; y < area.Max.Y; y++ {
+		for x := area.Min.X; x < area.Max.X; x++ {
+			scr.SetCell(x, y, nil)
+		}
+	}
+	buf.Draw(scr, area)
 }
 
 // SetSize sets the size of the chat view port.
diff --git a/internal/ui/model/chat_draw_cache_test.go b/internal/ui/model/chat_draw_cache_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..ac422364716e3a1f35baa4e252d1137c38bdf350
--- /dev/null
+++ b/internal/ui/model/chat_draw_cache_test.go
@@ -0,0 +1,334 @@
+package model
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/ui/chat"
+	uv "github.com/charmbracelet/ultraviolet"
+	"github.com/charmbracelet/x/ansi"
+	"github.com/stretchr/testify/require"
+)
+
+// drawTestArea is a fixed area used by the cache tests; it must be at least
+// as large as the test items so we can compare every cell.
+func drawTestArea(width, height int) uv.Rectangle {
+	return uv.Rect(0, 0, width, height)
+}
+
+// renderToBuffer mirrors what Chat.Draw does at the screen layer so tests
+// can assert byte-equivalence with a fresh uv.NewStyledString render.
+func renderToBuffer(t *testing.T, c *Chat, w, h int) string {
+	t.Helper()
+	scr := uv.NewScreenBuffer(w, h)
+	c.Draw(scr, drawTestArea(w, h))
+	return scr.Render()
+}
+
+// TestChatDrawCache_HitOnIdenticalRender asserts that two consecutive Draws
+// against the same list output reuse the same chatDrawCache rather than
+// allocating a fresh one. Both the wrapper pointer and the embedded
+// RenderBuffer pointer must be stable across the second Draw — that's what
+// proves no re-decoding happened.
+func TestChatDrawCache_HitOnIdenticalRender(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+		testMessageItem{id: "b", text: "beta"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 20
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.NotNil(t, u.chat.drawCache, "first draw should populate cache")
+	firstCache := u.chat.drawCache
+	firstBuf := u.chat.drawCache.buf.RenderBuffer
+
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.Same(t, firstCache, u.chat.drawCache,
+		"identical rendered string must reuse the same cache pointer")
+	require.Same(t, firstBuf, u.chat.drawCache.buf.RenderBuffer,
+		"identical rendered string must reuse the same RenderBuffer pointer")
+}
+
+// TestChatDrawCache_MissOnDifferentRender asserts that when the list output
+// changes, the cache is rebuilt and the new draw output matches a fresh
+// uv.NewStyledString render byte-for-byte.
+func TestChatDrawCache_MissOnDifferentRender(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 20
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.NotNil(t, u.chat.drawCache)
+	firstCache := u.chat.drawCache
+
+	// Replace the items so the rendered string differs.
+	u.chat.SetMessages(
+		testMessageItem{id: "c", text: "gamma delta"},
+	)
+	u.updateLayoutAndSize()
+
+	got := renderToBuffer(t, u.chat, w, h)
+	require.NotSame(t, firstCache, u.chat.drawCache,
+		"changed rendered string must replace the cache entry")
+
+	// Output must match a fresh uv.NewStyledString render of the current
+	// list output for the same area. This is the byte-equivalence guard
+	// that protects against blit drift from StyledString.Draw.
+	want := freshStyledRender(u.chat.list.Render(), w, h)
+	require.Equal(t, want, got)
+}
+
+// TestChatDrawCache_ReusedAcrossDifferentArea asserts that the cached
+// decoded lines do not depend on the screen / area: the same string drawn
+// into a smaller area must hit the cache, and the output must match a fresh
+// uv.NewStyledString render against the new area.
+func TestChatDrawCache_ReusedAcrossDifferentArea(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+		testMessageItem{id: "b", text: "beta"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 20
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.NotNil(t, u.chat.drawCache)
+	firstCache := u.chat.drawCache
+	firstBuf := u.chat.drawCache.buf.RenderBuffer
+
+	// Draw the same content into a smaller buffer. The list output is
+	// width-sensitive in production (list.Render depends on width), so we
+	// hold the list width fixed via updateLayoutAndSize and only shrink
+	// the destination area. The cache key is the rendered string + width
+	// method, both unchanged here.
+	smallScr := uv.NewScreenBuffer(40, 10)
+	u.chat.Draw(smallScr, uv.Rect(0, 0, 40, 10))
+
+	require.Same(t, firstCache, u.chat.drawCache,
+		"smaller area must still hit the cache")
+	require.Same(t, firstBuf, u.chat.drawCache.buf.RenderBuffer,
+		"cached RenderBuffer must be reused across area changes")
+
+	want := freshStyledRender(u.chat.list.Render(), 40, 10)
+	require.Equal(t, want, smallScr.Render())
+}
+
+// TestChatDrawCache_BoundedSize asserts the draw cache holds at most one
+// entry. This is structural — any future change that introduces an LRU or
+// ring would have to update this test.
+func TestChatDrawCache_BoundedSize(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	w, h := 80, 20
+
+	// Cycle through several distinct list outputs and confirm that
+	// drawCache is still a single *chatDrawCache pointing at the most
+	// recent rendered string each time.
+	for i := range 5 {
+		u.chat.SetMessages(
+			testMessageItem{id: "x", text: "tick " + strconv.Itoa(i)},
+		)
+		u.updateLayoutAndSize()
+		_ = renderToBuffer(t, u.chat, w, h)
+		require.NotNil(t, u.chat.drawCache)
+		require.Equal(t, u.chat.list.Render(), u.chat.drawCache.rendered,
+			"cache.rendered must always match the most recent list output")
+	}
+
+	// Sanity: the type of drawCache is a single pointer, not a slice/map.
+	// This is enforced at compile time but checking it explicitly here
+	// keeps the bounded-size invariant visible to future readers.
+	require.IsType(t, (*chatDrawCache)(nil), u.chat.drawCache)
+}
+
+// freshStyledRender renders the same string through uv.NewStyledString into
+// a fresh ScreenBuffer of the given size. It's the reference implementation
+// used by the cache tests to detect blit drift.
+func freshStyledRender(s string, w, h int) string {
+	scr := uv.NewScreenBuffer(w, h)
+	uv.NewStyledString(s).Draw(scr, uv.Rect(0, 0, w, h))
+	return scr.Render()
+}
+
+// TestChatDrawCache_InvalidatedByWidthMethodSwap asserts the cache rebuilds
+// when the destination screen's width method changes between frames.
+// GraphemeWidth and WcWidth disagree on emoji ZWJ sequences and other
+// modern unicode, so the decoded buffer is only valid for the method it
+// was decoded with. Reusing across methods would corrupt cell widths.
+func TestChatDrawCache_InvalidatedByWidthMethodSwap(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	// Use content where the two methods disagree so any accidental
+	// reuse would surface as a different rendered cell layout. The
+	// woman-technologist ZWJ sequence is one cell under GraphemeWidth
+	// but two under WcWidth (the components combine).
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "hi 👩\u200d💻 there"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 5
+
+	// First draw under GraphemeWidth.
+	scrA := uv.ScreenBuffer{
+		RenderBuffer: uv.NewRenderBuffer(w, h),
+		Method:       ansi.GraphemeWidth,
+	}
+	u.chat.Draw(scrA, uv.Rect(0, 0, w, h))
+	require.NotNil(t, u.chat.drawCache)
+	require.Equal(t, ansi.GraphemeWidth, u.chat.drawCache.method)
+	firstCache := u.chat.drawCache
+
+	// Same string, swap the method. The cache key includes method, so
+	// this must rebuild even though `rendered` is byte-identical.
+	scrB := uv.ScreenBuffer{
+		RenderBuffer: uv.NewRenderBuffer(w, h),
+		Method:       ansi.WcWidth,
+	}
+	u.chat.Draw(scrB, uv.Rect(0, 0, w, h))
+	require.NotSame(t, firstCache, u.chat.drawCache,
+		"width method change must invalidate the cache")
+	require.Equal(t, ansi.WcWidth, u.chat.drawCache.method)
+}
+
+// fixedMethodScreen wraps a uv.ScreenBuffer but reports a custom (non
+// ansi.Method) WidthMethod implementation. Chat.Draw is expected to
+// detect the type-assertion miss and fall through to the uncached path.
+type fixedMethodScreen struct {
+	uv.ScreenBuffer
+	method uv.WidthMethod
+}
+
+func (s fixedMethodScreen) WidthMethod() uv.WidthMethod { return s.method }
+
+// customWidth is a WidthMethod whose concrete type is intentionally NOT
+// ansi.Method, so the type assertion in Chat.Draw fails. The actual
+// width math just delegates to ansi.GraphemeWidth so the rendered
+// output is well-defined and comparable.
+type customWidth struct{}
+
+func (customWidth) StringWidth(s string) int {
+	return ansi.GraphemeWidth.StringWidth(s)
+}
+
+// TestChatDrawCache_FallbackOnNonAnsiMethod asserts that when the screen
+// reports a WidthMethod whose concrete type is not ansi.Method, Chat.Draw
+// skips the cache (it has no comparable key for an arbitrary interface)
+// and falls through to a direct uv.NewStyledString.Draw, producing the
+// same output as the upstream uncached path.
+func TestChatDrawCache_FallbackOnNonAnsiMethod(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+		testMessageItem{id: "b", text: "beta"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 40, 10
+	scr := fixedMethodScreen{
+		ScreenBuffer: uv.ScreenBuffer{
+			RenderBuffer: uv.NewRenderBuffer(w, h),
+			Method:       ansi.GraphemeWidth,
+		},
+		method: customWidth{},
+	}
+	// Sanity: the wrapper actually returns a non-ansi.Method type so
+	// the fallback path is exercised end-to-end.
+	_, isAnsiMethod := scr.WidthMethod().(ansi.Method)
+	require.False(t, isAnsiMethod,
+		"test setup must hand Draw a non-ansi.Method WidthMethod")
+
+	u.chat.Draw(scr, uv.Rect(0, 0, w, h))
+
+	// The fallback path uses uv.NewStyledString(rendered).Draw — same
+	// thing freshStyledRender does — so output must match the
+	// reference render byte-for-byte.
+	want := freshStyledRender(u.chat.list.Render(), w, h)
+	require.Equal(t, want, scr.Render())
+
+	// And no cache was populated (or if it was, it wasn't used for
+	// this draw). Either way: drawing through the fallback must never
+	// leave a stale cache that future draws would reuse incorrectly.
+	require.Nil(t, u.chat.drawCache,
+		"fallback path must not populate the cache")
+}
+
+// TestRenderedBounds_MatchesPrintStringTallyForZWJ pins the invariant
+// that renderedBounds (used to size the cache buffer) returns the same
+// width as the cell tally StyledString.Draw will write into a buffer
+// using the same WidthMethod. We compare the computed width to a
+// reference render: lay the string into an oversized buffer, then count
+// non-empty cells per row. A divergent-width sample (woman-technologist
+// ZWJ sequence) is the canonical case that would expose a Bounds()
+// vs printString mismatch — under GraphemeWidth the sequence is one
+// cell, under WcWidth it's two — so the same string must produce two
+// different widths under the two methods, both matching the tally.
+func TestRenderedBounds_MatchesPrintStringTallyForZWJ(t *testing.T) {
+	t.Parallel()
+
+	const sample = "x 👩\u200d💻 y"
+
+	for _, m := range []ansi.Method{ansi.GraphemeWidth, ansi.WcWidth} {
+		w, h := renderedBounds(sample, m)
+		// Tally what printString would actually write.
+		// Use an oversized buffer so nothing gets clipped.
+		scr := uv.ScreenBuffer{
+			RenderBuffer: uv.NewRenderBuffer(64, 4),
+			Method:       m,
+		}
+		uv.NewStyledString(sample).Draw(scr, uv.Rect(0, 0, 64, 4))
+
+		// Tally width: rightmost non-empty cell + its width, per row.
+		gotW := 0
+		gotH := 0
+		for y := 0; y < 4; y++ {
+			rowW := 0
+			rowHasContent := false
+			for x := 0; x < 64; x++ {
+				cell := scr.CellAt(x, y)
+				if cell == nil || cell.IsZero() ||
+					cell.Content == " " && cell.Width == 1 &&
+						cell.Style == (uv.Style{}) {
+					continue
+				}
+				rowHasContent = true
+				if x+cell.Width > rowW {
+					rowW = x + cell.Width
+				}
+			}
+			if rowHasContent {
+				gotH = y + 1
+				if rowW > gotW {
+					gotW = rowW
+				}
+			}
+		}
+
+		require.Equal(t, gotW, w,
+			"renderedBounds width must match printString cell tally for method %v", m)
+		// Height is just '\n' count + 1 — no special-casing needed
+		// here, but pin it so future regressions on multi-line
+		// inputs don't sneak by.
+		require.Equal(t, gotH, h,
+			"renderedBounds height must match printString row tally for method %v", m)
+	}
+}
+
+// Compile-time guard: testMessageItem is reused from layout_test.go and
+// satisfies chat.MessageItem there. We rely on the same shape here.
+var _ chat.MessageItem = testMessageItem{}

From 646a5053bd58635316bf960f3d8e5d4a0996a24b Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Thu, 14 May 2026 12:48:46 -0400
Subject: [PATCH 09/10] fix(ui): keep tool spinners animating during
 long-running tasks

---
 internal/ui/chat/agent.go             |  38 +++++++
 internal/ui/chat/tools.go             |  12 +++
 internal/ui/chat/version_bump_test.go | 140 ++++++++++++++++++++++++++
 3 files changed, 190 insertions(+)

diff --git a/internal/ui/chat/agent.go b/internal/ui/chat/agent.go
index abc65e7b47c684cd1a4d8a12b0a96efaa924329e..8342ebaf8345dae6caf16db6b44aad75b97e367c 100644
--- a/internal/ui/chat/agent.go
+++ b/internal/ui/chat/agent.go
@@ -53,11 +53,21 @@ func NewAgentToolMessageItem(
 }
 
 // Animate progresses the message animation if it should be spinning.
+//
+// Bumps the parent's F6 list-cache version on both the parent-tick and
+// nested-tick branches. Nested tools are not list entries of their
+// own — their IDs map to this parent's index in idInxMap
+// (internal/ui/model/chat.go:240-246) and their renders are embedded
+// inline in this parent's output — so the list only checks the
+// parent's version. Without the bump, the list cache would serve the
+// previously rendered frame indefinitely and the spinner would appear
+// frozen.
 func (a *AgentToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 	if a.result != nil || a.Status() == ToolStatusCanceled {
 		return nil
 	}
 	if msg.ID == a.ID() {
+		a.Bump()
 		return a.anim.Animate(msg)
 	}
 	for _, nestedTool := range a.nestedTools {
@@ -65,6 +75,7 @@ func (a *AgentToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 			continue
 		}
 		if s, ok := nestedTool.(Animatable); ok {
+			a.Bump()
 			return s.Animate(msg)
 		}
 	}
@@ -211,6 +222,33 @@ func NewAgenticFetchToolMessageItem(
 	return t
 }
 
+// Animate progresses the message animation if it should be spinning.
+// See [AgentToolMessageItem.Animate] for the parent-bump rationale —
+// without an override, the embedded base.Animate would (a) drop
+// StepMsgs whose ID matches a nested child instead of the parent
+// (anim.Animate's ID check at internal/ui/anim/anim.go:326-329
+// silently returns nil), and (b) never invalidate the parent's
+// list-cache entry on a parent tick.
+func (a *AgenticFetchToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
+	if a.result != nil || a.Status() == ToolStatusCanceled {
+		return nil
+	}
+	if msg.ID == a.ID() {
+		a.Bump()
+		return a.anim.Animate(msg)
+	}
+	for _, nestedTool := range a.nestedTools {
+		if msg.ID != nestedTool.ID() {
+			continue
+		}
+		if s, ok := nestedTool.(Animatable); ok {
+			a.Bump()
+			return s.Animate(msg)
+		}
+	}
+	return nil
+}
+
 // NestedTools returns the nested tools.
 func (a *AgenticFetchToolMessageItem) NestedTools() []ToolMessageItem {
 	return a.nestedTools
diff --git a/internal/ui/chat/tools.go b/internal/ui/chat/tools.go
index cac2e227aeab5b12c9e99c5777e0e2f810462ee3..3774e217fa362dc80057a3e92707bdeeff9d07d3 100644
--- a/internal/ui/chat/tools.go
+++ b/internal/ui/chat/tools.go
@@ -296,10 +296,22 @@ func (t *baseToolMessageItem) StartAnimation() tea.Cmd {
 }
 
 // Animate progresses the assistant message animation if it should be spinning.
+//
+// Bumps the F6 list-cache version so the next draw re-renders this
+// item: a spinner tick mutates anim's internal frame counter, which
+// changes the rendered output but is invisible to the per-item
+// caches. Without the bump the list cache would serve the previously
+// rendered frame indefinitely and the spinner would appear frozen.
+// The ID gate keeps unrelated ticks (routed here by a future change
+// to chat.Animate's dispatch) from churning the cache.
 func (t *baseToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 	if !t.isSpinning() {
 		return nil
 	}
+	if msg.ID != t.toolCall.ID {
+		return nil
+	}
+	t.Bump()
 	return t.anim.Animate(msg)
 }
 
diff --git a/internal/ui/chat/version_bump_test.go b/internal/ui/chat/version_bump_test.go
index 847f0053bb38324e7019e35294c7e9a81a9ba7a4..47ce7c5ed8ee615fb57d73df15260b9cddab0482 100644
--- a/internal/ui/chat/version_bump_test.go
+++ b/internal/ui/chat/version_bump_test.go
@@ -400,6 +400,146 @@ func TestAgenticFetchToolMessageItem_NestedChildInPlaceMutationBumpsParent(t *te
 		"parent SetNestedTools must bump even when child pointers are unchanged")
 }
 
+// requireNoBump asserts the supplied mutator leaves the item's
+// Version() unchanged. The mutator runs once; an unexpected bump
+// would force the F6 list memo to re-render an item whose output
+// did not change, churning the cache.
+func requireNoBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Equalf(t, before, after,
+		"%s must not bump Version() (before=%d, after=%d)", name, before, after)
+}
+
+// TestBaseToolMessageItem_AnimateBumpsVersion is the spinner
+// regression test for non-agent tools: while the tool is spinning,
+// every anim.StepMsg whose ID matches the tool must bump Version()
+// so the list-level cache invalidates and the next draw re-renders
+// the advanced spinner frame. Foreign IDs must not bump (they would
+// churn the cache on every frame), and a finished tool must not
+// bump on any ID (the entry is frozen and stays frozen).
+func TestBaseToolMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	tc := message.ToolCall{ID: "tc-spin", Name: "bash", Input: "{}", Finished: false}
+	item := NewToolMessageItem(&sty, "msg", tc, nil, false)
+	v := item.(versionedItem)
+	a, ok := item.(Animatable)
+	require.True(t, ok, "base tool message item must implement Animatable")
+
+	// Spinning + matching ID → bump.
+	requireBump(t, "Animate[spinning,own ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: tc.ID})
+	})
+
+	// Spinning + foreign ID → no bump. Routing this StepMsg here at
+	// all would mean a future chat.Animate refactor; the item must
+	// be defensive against it so we don't churn the list cache.
+	requireNoBump(t, "Animate[spinning,foreign ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: "some-other-tool"})
+	})
+
+	// Finished → no bump on any ID. The entry is frozen; a stray
+	// bump would needlessly invalidate frozen entries.
+	tcFinished := tc
+	tcFinished.Finished = true
+	item.SetToolCall(tcFinished)
+	item.SetResult(&message.ToolResult{ToolCallID: tc.ID, Content: "ok"})
+	require.True(t, item.Finished(), "tool must report Finished() once the result lands")
+
+	requireNoBump(t, "Animate[finished,own ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: tc.ID})
+	})
+	requireNoBump(t, "Animate[finished,foreign ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: "some-other-tool"})
+	})
+}
+
+// TestAgentToolMessageItem_AnimateBumpsVersion is the spinner
+// regression test for agent tools. The parent must bump on both
+// the parent-tick branch (msg.ID == parent.ID()) and the
+// nested-tick branch (msg.ID == nested.ID()) because the list
+// only checks the parent's version — nested tools are not list
+// entries of their own. Unrelated IDs must not bump, and a parent
+// with a result must not bump on any ID.
+func TestAgentToolMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parentTC := message.ToolCall{ID: "agent-parent", Name: "agent", Input: `{}`, Finished: false}
+	parent := NewAgentToolMessageItem(&sty, parentTC, nil, false)
+
+	childTC := message.ToolCall{ID: "agent-child", Name: "bash", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	parent.AddNestedTool(child)
+
+	// Spinning + parent's own ID → parent bumps.
+	requireBump(t, "Animate[spinning,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+
+	// Spinning + nested child ID → parent bumps. The list only
+	// invalidates on the parent; without this the nested
+	// spinner's frame would never reach the screen even though
+	// the nested anim's step has advanced.
+	requireBump(t, "Animate[spinning,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+
+	// Spinning + unrelated ID → no bump.
+	requireNoBump(t, "Animate[spinning,foreign ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: "unrelated"})
+	})
+
+	// Once the parent has a result, neither branch bumps.
+	parent.SetResult(&message.ToolResult{ToolCallID: parentTC.ID, Content: "done"})
+	requireNoBump(t, "Animate[finished,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+	requireNoBump(t, "Animate[finished,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+}
+
+// TestAgenticFetchToolMessageItem_AnimateBumpsVersion is the
+// agentic-fetch counterpart of the agent-tool Animate bump test.
+// Without an explicit override the embedded base Animate would
+// drop nested-child StepMsgs at anim.Animate's ID check and never
+// bump the parent on its own ticks; this test locks in the
+// override.
+func TestAgenticFetchToolMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parentTC := message.ToolCall{ID: "fetch-parent", Name: "agentic_fetch", Input: `{}`, Finished: false}
+	parent := NewAgenticFetchToolMessageItem(&sty, parentTC, nil, false)
+
+	childTC := message.ToolCall{ID: "fetch-child", Name: "fetch", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	parent.AddNestedTool(child)
+
+	requireBump(t, "Animate[spinning,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+	requireBump(t, "Animate[spinning,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+	requireNoBump(t, "Animate[spinning,foreign ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: "unrelated"})
+	})
+
+	parent.SetResult(&message.ToolResult{ToolCallID: parentTC.ID, Content: "done"})
+	requireNoBump(t, "Animate[finished,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+	requireNoBump(t, "Animate[finished,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+}
+
 // TestBaseToolMessageItem_FinishedTransition covers §4.5.1 for
 // tools: a still-running tool reports Finished() == false; once the
 // tool call is marked finished and a result lands, Finished()

From 30f9c6ed55daf68b5ee5cc6a3f0fcebbf2559252 Mon Sep 17 00:00:00 2001
From: Christian Rocha <christian@rocha.is>
Date: Fri, 15 May 2026 14:18:53 -0400
Subject: [PATCH 10/10] chore(ui): make spinner deterministic for testing

---
 internal/ui/anim/anim.go | 54 ++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/internal/ui/anim/anim.go b/internal/ui/anim/anim.go
index deddbff2f0b88681a05120b3f85debc4b1a5404e..2479e7f93052a731db84d8c7c3257f17891e6366 100644
--- a/internal/ui/anim/anim.go
+++ b/internal/ui/anim/anim.go
@@ -30,9 +30,14 @@ const (
 	// change every 8 frames (400 milliseconds).
 	ellipsisAnimSpeed = 8
 
-	// The maximum amount of time that can pass before a character appears.
-	// This is used to create a staggered entrance effect.
-	maxBirthOffset = time.Second
+	// The maximum number of animation steps that can pass before a
+	// character appears. With fps == 20 this is ~1s of staggered
+	// entrance, identical to the previous wall-clock-driven value.
+	// Switching from wall-clock + rand to a step-driven birth schedule
+	// keeps Render() deterministic: two Anim instances built from the
+	// same Settings produce byte-identical output when no Animate ticks
+	// have advanced their step counter.
+	maxBirthSteps = 20
 
 	// Number of frames to prerender for the animation. After this number
 	// of frames, the animation will loop. This only applies when color
@@ -107,12 +112,12 @@ type Anim struct {
 	label            *csync.Slice[string]
 	labelWidth       int
 	labelColor       color.Color
-	startTime        time.Time
-	birthOffsets     []time.Duration
+	birthSteps       []int
 	initialFrames    [][]string // frames for the initial characters
 	initialized      atomic.Bool
 	cyclingFrames    [][]string           // frames for the cycling characters
-	step             atomic.Int64         // current main frame step
+	step             atomic.Int64         // current main frame step (wraps)
+	framesSinceStart atomic.Int64         // total Animate ticks (does not wrap)
 	ellipsisStep     atomic.Int64         // current ellipsis frame step
 	ellipsisFrames   *csync.Slice[string] // ellipsis animation frames
 	id               string
@@ -140,7 +145,6 @@ func New(opts Settings) *Anim {
 	} else {
 		a.id = fmt.Sprintf("%d", nextID())
 	}
-	a.startTime = time.Now()
 	a.cyclingCharWidth = opts.Size
 	a.labelColor = opts.LabelColor
 
@@ -207,7 +211,13 @@ func New(opts Settings) *Anim {
 			}
 		}
 
-		// Prerender scrambled rune frames for the animation.
+		// Prerender scrambled rune frames for the animation. Seed
+		// the rune picker off the settings hash so cyclingFrames is
+		// a pure function of Settings: two processes with identical
+		// Settings populate the cache with the same glyphs, which
+		// keeps any cross-process golden-file comparison stable.
+		seed := xxh3.HashString(cacheKey)
+		rng := rand.New(rand.NewPCG(seed, ^seed))
 		a.cyclingFrames = make([][]string, numFrames)
 		offset = 0
 		for i := range a.cyclingFrames {
@@ -219,7 +229,7 @@ func New(opts Settings) *Anim {
 
 				// Also prerender the color with Lip Gloss here to avoid processing
 				// in the render loop.
-				r := availableRunes[rand.IntN(len(availableRunes))]
+				r := availableRunes[rng.IntN(len(availableRunes))]
 				a.cyclingFrames[i][j] = lipgloss.NewStyle().
 					Foreground(ramp[j+offset]).
 					Render(string(r))
@@ -249,10 +259,20 @@ func New(opts Settings) *Anim {
 		animCacheMap.Set(cacheKey, cached)
 	}
 
-	// Random assign a birth to each character for a stagged entrance effect.
-	a.birthOffsets = make([]time.Duration, a.width)
-	for i := range a.birthOffsets {
-		a.birthOffsets[i] = time.Duration(rand.N(int64(maxBirthOffset))) * time.Nanosecond
+	// Assign a deterministic birth step to each column for a
+	// staggered entrance effect. The schedule is seeded off the
+	// spinner id and the settings hash, so two spinners with the
+	// same role and identity stagger identically (this is what
+	// keeps Render() byte-equal across cache hits and across
+	// processes for the same Settings+ID) while spinners with
+	// different ids — distinct assistant messages, different tool
+	// calls, "Thinking" vs "Generating" labels — fade in with
+	// different patterns instead of marching in lock-step.
+	birthSeed := xxh3.HashString(a.id + "|" + cacheKey)
+	birthRng := rand.New(rand.NewPCG(birthSeed, ^birthSeed))
+	a.birthSteps = make([]int, a.width)
+	for i := range a.birthSteps {
+		a.birthSteps[i] = birthRng.IntN(maxBirthSteps)
 	}
 
 	return a
@@ -333,13 +353,14 @@ func (a *Anim) Animate(msg StepMsg) tea.Cmd {
 		a.step.Store(0)
 	}
 
+	frames := a.framesSinceStart.Add(1)
 	if a.initialized.Load() && a.labelWidth > 0 {
 		// Manage the ellipsis animation.
 		ellipsisStep := a.ellipsisStep.Add(1)
 		if int(ellipsisStep) >= ellipsisAnimSpeed*len(ellipsisFrames) {
 			a.ellipsisStep.Store(0)
 		}
-	} else if !a.initialized.Load() && time.Since(a.startTime) >= maxBirthOffset {
+	} else if !a.initialized.Load() && int(frames) >= maxBirthSteps {
 		a.initialized.Store(true)
 	}
 	return a.Step()
@@ -349,10 +370,11 @@ func (a *Anim) Animate(msg StepMsg) tea.Cmd {
 func (a *Anim) Render() string {
 	var b strings.Builder
 	step := int(a.step.Load())
+	frames := int(a.framesSinceStart.Load())
 	for i := range a.width {
 		switch {
-		case !a.initialized.Load() && i < len(a.birthOffsets) && time.Since(a.startTime) < a.birthOffsets[i]:
-			// Birth offset not reached: render initial character.
+		case !a.initialized.Load() && i < len(a.birthSteps) && frames < a.birthSteps[i]:
+			// Birth step not reached: render initial character.
 			b.WriteString(a.initialFrames[step][i])
 		case i < a.cyclingCharWidth:
 			// Render a cycling character.