perf(chat): cache the parts of an assistant message separately

Christian Rocha and Charm Crush created 2 months ago

Each assistant message has up to three sections — thinking, response, and
errors — and previously a new streamed token re-rendered all of them. Now
each section is cached on its own, so streaming a response no longer
re-renders the (often long) thinking block above it.

Co-Authored-By: Charm Crush <crush@charm.land>

Change summary

internal/ui/chat/assistant.go                    | 296 ++++++++++-
internal/ui/chat/assistant_section_cache_test.go | 464 ++++++++++++++++++
2 files changed, 728 insertions(+), 32 deletions(-)

Detailed changes

internal/ui/chat/assistant.go 🔗

@@ -1,7 +1,9 @@
 package chat
 
 import (
+	"encoding/binary"
 	"fmt"
+	"hash/fnv"
 	"strings"
 
 	tea "charm.land/bubbletea/v2"
@@ -20,6 +22,70 @@ const assistantMessageTruncateFormat = "… (%d lines hidden) [click or space to
 // maxCollapsedThinkingHeight defines the maximum height of the thinking
 const maxCollapsedThinkingHeight = 10
 
+// assistantSection is a per-section render cache for AssistantMessageItem.
+// Each section (thinking, content, error) carries its own keys so that
+// streaming a section does not invalidate a different — often more
+// expensive — section's cached render. srcHash is an FNV-64 of the
+// section's source text; extra captures any other state that changes
+// the rendered output (e.g. thinkingExpanded, the thinking footer
+// inputs). valid disambiguates a real cache hit from the zero value
+// when both source text and extras hash to zero. aux carries any
+// per-section side data that the caller needs to recover on a hit
+// (e.g. the thinking box height for click detection).
+type assistantSection struct {
+	width   int
+	srcHash uint64
+	extra   uint64
+	out     string
+	h       int
+	aux     int
+	valid   bool
+}
+
+// hit reports whether the cache entry matches the requested key.
+func (s *assistantSection) hit(width int, srcHash, extra uint64) bool {
+	return s.valid && s.width == width && s.srcHash == srcHash && s.extra == extra
+}
+
+// store records the rendered output under the given key.
+func (s *assistantSection) store(width int, srcHash, extra uint64, out string, aux int) {
+	s.width = width
+	s.srcHash = srcHash
+	s.extra = extra
+	s.out = out
+	s.h = lipgloss.Height(out)
+	s.aux = aux
+	s.valid = true
+}
+
+// reset drops the cached output.
+func (s *assistantSection) reset() {
+	*s = assistantSection{}
+}
+
+// fnv64 hashes a single string with FNV-64.
+func fnv64(s string) uint64 {
+	h := fnv.New64a()
+	_, _ = h.Write([]byte(s))
+	return h.Sum64()
+}
+
+// fnvFields hashes a list of byte fields with length-prefix framing
+// so that no concatenation collision can occur between distinct
+// field tuples (a NUL inside one field cannot impersonate a
+// boundary between two fields). Each field is preceded by its
+// length encoded as 8 bytes little-endian.
+func fnvFields(fields ...[]byte) uint64 {
+	h := fnv.New64a()
+	var lenBuf [8]byte
+	for _, f := range fields {
+		binary.LittleEndian.PutUint64(lenBuf[:], uint64(len(f)))
+		_, _ = h.Write(lenBuf[:])
+		_, _ = h.Write(f)
+	}
+	return h.Sum64()
+}
+
 // AssistantMessageItem represents an assistant message in the chat UI.
 //
 // This item includes thinking, and the content but does not include the tool calls.
@@ -33,6 +99,13 @@ type AssistantMessageItem struct {
 	anim              *anim.Anim
 	thinkingExpanded  bool
 	thinkingBoxHeight int // Tracks the rendered thinking box height for click detection.
+
+	// Per-section render caches. Splitting these out means content
+	// streaming does not invalidate the (often expensive) thinking
+	// render, and vice versa.
+	thinkingSec assistantSection
+	contentSec  assistantSection
+	errorSec    assistantSection
 }
 
 var _ Expandable = (*AssistantMessageItem)(nil)
@@ -88,14 +161,7 @@ func (a *AssistantMessageItem) RawRender(width int) string {
 		spinner = a.renderSpinning()
 	}
 
-	content, height, ok := a.getCachedRender(cappedWidth)
-	if !ok {
-		content = a.renderMessageContent(cappedWidth)
-		height = lipgloss.Height(content)
-		// cache the rendered content
-		a.setCachedRender(content, cappedWidth, height)
-	}
-
+	content, height := a.renderMessageContent(cappedWidth)
 	highlightedContent := a.renderHighlighted(content, cappedWidth, height)
 	if spinner != "" {
 		if highlightedContent != "" {
@@ -116,15 +182,16 @@ func (a *AssistantMessageItem) Render(width int) string {
 	// RawRender, so we can just apply the styles directly to each line.
 	//
 	// The split + per-line prefix loop is O(L); cache the result keyed
-	// by (width, focused) so steady-state Render becomes a pointer
-	// return. Bypass the cache while spinning (RawRender's spinner
-	// suffix changes every animation frame) or while a highlight range
-	// is active (selection drag).
+	// by (width, focused, sectionsFingerprint) so steady-state Render
+	// becomes a pointer return. The sectionsFingerprint folds in the
+	// per-section srcHash/extra so that any sub-cache change
+	// invalidates this prefix cache without requiring an explicit
+	// drop. Bypass the cache while spinning (RawRender's spinner
+	// suffix changes every animation frame) or while a highlight
+	// range is active (selection drag).
 	useCache := !a.isSpinning() && !a.isHighlighted()
-	var key uint64
-	if a.focused {
-		key = 1
-	}
+	cappedWidth := cappedMessageWidth(width)
+	key := a.prefixCacheKey(cappedWidth)
 	if useCache {
 		if cached, ok := a.getCachedPrefixedRender(width, key); ok {
 			return cached
@@ -148,36 +215,182 @@ func (a *AssistantMessageItem) Render(width int) string {
 	return out
 }
 
-// renderMessageContent renders the message content including thinking, main content, and finish reason.
-func (a *AssistantMessageItem) renderMessageContent(width int) string {
+// prefixCacheKey builds the F3 prefixed-render cache key. We pack the
+// focus bit into bit 0 and a fingerprint of the section caches into
+// the upper bits, so any change to a sub-section's source text or
+// extras forces the prefix cache to miss without needing an explicit
+// drop. cappedWidth is included so a cached prefix never survives a
+// section-cache miss caused by a width change. The finish reason is
+// folded in too because it controls the composition of
+// renderMessageContent (e.g. appending the constant "Canceled"
+// string) — that decision lives outside any section's own hash.
+func (a *AssistantMessageItem) prefixCacheKey(cappedWidth int) uint64 {
+	thinkSrc, thinkExtra := a.thinkingKey()
+	contentSrc, contentExtra := a.contentKey()
+	errSrc, errExtra := a.errorKey()
+	h := fnv.New64a()
+	var buf [8]byte
+	writeU64 := func(v uint64) {
+		for i := range 8 {
+			buf[i] = byte(v >> (8 * i))
+		}
+		_, _ = h.Write(buf[:])
+	}
+	writeU64(uint64(cappedWidth))
+	writeU64(thinkSrc)
+	writeU64(thinkExtra)
+	writeU64(contentSrc)
+	writeU64(contentExtra)
+	writeU64(errSrc)
+	writeU64(errExtra)
+	writeU64(a.compositionKey())
+	fingerprint := h.Sum64()
+	var focusBit uint64
+	if a.focused {
+		focusBit = 1
+	}
+	return (fingerprint &^ 1) | focusBit
+}
+
+// compositionKey hashes the inputs to renderMessageContent's structural
+// decisions (which sections to include, whether to append the
+// constant "Canceled" footer) so that flipping IsFinished or the
+// finish reason invalidates the prefix cache even when no section's
+// own source text changed.
+func (a *AssistantMessageItem) compositionKey() uint64 {
+	var finishedFlag byte
+	var reason string
+	if a.message.IsFinished() {
+		finishedFlag = 1
+		reason = string(a.message.FinishReason())
+	}
+	// Length-prefixed framing keeps the finished flag and the reason
+	// string from blending into one another.
+	return fnvFields([]byte{finishedFlag}, []byte(reason))
+}
+
+// renderMessageContent renders the message content including thinking, main
+// content, and finish reason. Each section is served from its own cache;
+// only the section whose source text or extras changed since the last
+// render is recomputed.
+func (a *AssistantMessageItem) renderMessageContent(width int) (string, int) {
 	var messageParts []string
 	thinking := strings.TrimSpace(a.message.ReasoningContent().Thinking)
 	content := strings.TrimSpace(a.message.Content().Text)
-	// if the massage has reasoning content add that first
+
 	if thinking != "" {
-		messageParts = append(messageParts, a.renderThinking(a.message.ReasoningContent().Thinking, width))
+		messageParts = append(messageParts, a.cachedThinking(width))
 	}
 
-	// then add the main content
 	if content != "" {
-		// add a spacer between thinking and content
 		if thinking != "" {
 			messageParts = append(messageParts, "")
 		}
-		messageParts = append(messageParts, a.renderMarkdown(content, width))
+		messageParts = append(messageParts, a.cachedContent(width))
 	}
 
-	// finally add any finish reason info
 	if a.message.IsFinished() {
 		switch a.message.FinishReason() {
 		case message.FinishReasonCanceled:
 			messageParts = append(messageParts, a.sty.Messages.AssistantCanceled.Render("Canceled"))
 		case message.FinishReasonError:
-			messageParts = append(messageParts, a.renderError(width))
+			messageParts = append(messageParts, a.cachedError(width))
 		}
 	}
 
-	return strings.Join(messageParts, "\n")
+	out := strings.Join(messageParts, "\n")
+	return out, lipgloss.Height(out)
+}
+
+// thinkingKey returns the (srcHash, extra) cache key components for the
+// thinking section. extra folds in everything other than the raw
+// thinking text that affects the rendered output: the expanded flag
+// and the footer state (which depends on IsThinking, ToolCalls, and
+// ThinkingDuration).
+func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) {
+	thinking := a.message.ReasoningContent().Thinking
+	srcHash := fnv64(thinking)
+
+	showFooter := !a.message.IsThinking() || len(a.message.ToolCalls()) > 0
+	var durationStr string
+	if showFooter {
+		duration := a.message.ThinkingDuration()
+		if duration.String() != "0s" {
+			durationStr = duration.String()
+		}
+	}
+	var expanded byte
+	if a.thinkingExpanded {
+		expanded = 1
+	}
+	var footer byte
+	if showFooter {
+		footer = 1
+	}
+	// Length-prefixed framing avoids any delimiter collision between
+	// the flag bytes and the duration string.
+	extra := fnvFields([]byte{expanded, footer}, []byte(durationStr))
+	return srcHash, extra
+}
+
+// contentKey returns the (srcHash, extra) cache key components for the
+// main content section.
+func (a *AssistantMessageItem) contentKey() (uint64, uint64) {
+	return fnv64(a.message.Content().Text), 0
+}
+
+// errorKey returns the (srcHash, extra) cache key components for the
+// error section. Returns (0, 0) when no error is present so the cache
+// stays a no-op for non-error messages.
+func (a *AssistantMessageItem) errorKey() (uint64, uint64) {
+	if !a.message.IsFinished() || a.message.FinishReason() != message.FinishReasonError {
+		return 0, 0
+	}
+	finishPart := a.message.FinishPart()
+	if finishPart == nil {
+		return 0, 0
+	}
+	// Length-prefixed framing prevents Message+Details collisions
+	// between distinct (Message, Details) tuples that would
+	// otherwise concatenate to the same byte sequence.
+	return fnvFields([]byte(finishPart.Message), []byte(finishPart.Details)), 0
+}
+
+// cachedThinking returns the rendered thinking section, computing and
+// caching it on miss. The thinking-box height (used for click target
+// detection) is preserved across hits via assistantSection.aux so the
+// cached path never desyncs click detection.
+func (a *AssistantMessageItem) cachedThinking(width int) string {
+	srcHash, extra := a.thinkingKey()
+	if a.thinkingSec.hit(width, srcHash, extra) {
+		a.thinkingBoxHeight = a.thinkingSec.aux
+		return a.thinkingSec.out
+	}
+	out := a.renderThinking(a.message.ReasoningContent().Thinking, width)
+	a.thinkingSec.store(width, srcHash, extra, out, a.thinkingBoxHeight)
+	return out
+}
+
+// cachedContent returns the rendered content section.
+func (a *AssistantMessageItem) cachedContent(width int) string {
+	srcHash, extra := a.contentKey()
+	if a.contentSec.hit(width, srcHash, extra) {
+		return a.contentSec.out
+	}
+	out := a.renderMarkdown(a.message.Content().Text, width)
+	a.contentSec.store(width, srcHash, extra, out, 0)
+	return out
+}
+
+// cachedError returns the rendered error section.
+func (a *AssistantMessageItem) cachedError(width int) string {
+	srcHash, extra := a.errorKey()
+	if a.errorSec.hit(width, srcHash, extra) {
+		return a.errorSec.out
+	}
+	out := a.renderError(width)
+	a.errorSec.store(width, srcHash, extra, out, 0)
+	return out
 }
 
 // renderThinking renders the thinking/reasoning content with footer.
@@ -260,22 +473,41 @@ func (a *AssistantMessageItem) isSpinning() bool {
 	return (isThinking || !isFinished) && !hasContent && !hasToolCalls
 }
 
-// SetMessage is used to update the underlying message.
-func (a *AssistantMessageItem) SetMessage(message *message.Message) tea.Cmd {
+// SetMessage is used to update the underlying message. Only the
+// sub-section caches whose source text or extras changed are
+// invalidated; the others survive and serve cache hits on the next
+// RawRender.
+func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd {
 	wasSpinning := a.isSpinning()
-	a.message = message
-	a.clearCache()
+	a.message = msg
+	// The prefix cache is keyed by a fingerprint that includes every
+	// section's source hash, so an unchanged section keeps its prefix
+	// cache valid while a changed section forces a miss naturally.
+	// Section caches themselves are content-keyed, so they do not
+	// need an explicit drop here either.
 	if !wasSpinning && a.isSpinning() {
 		return a.StartAnimation()
 	}
 	return nil
 }
 
+// clearCache drops every cached render for this item, including the
+// per-section caches. Shadows the embedded cachedMessageItem.clearCache
+// so ClearItemCaches (style change) wipes the section caches too.
+func (a *AssistantMessageItem) clearCache() {
+	a.cachedMessageItem.clearCache()
+	a.thinkingSec.reset()
+	a.contentSec.reset()
+	a.errorSec.reset()
+}
+
 // ToggleExpanded toggles the expanded state of the thinking box and returns
-// whether the item is now expanded.
+// whether the item is now expanded. Both the thinking section cache and
+// the F3 prefix cache key fold in thinkingExpanded (via the section's
+// extra hash and the prefix cache fingerprint respectively), so no
+// explicit invalidation is required.
 func (a *AssistantMessageItem) ToggleExpanded() bool {
 	a.thinkingExpanded = !a.thinkingExpanded
-	a.clearCache()
 	return a.thinkingExpanded
 }

internal/ui/chat/assistant_section_cache_test.go 🔗

@@ -0,0 +1,464 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// Fixed Unix timestamps for deterministic cache-equality tests. The
+// thinking section's `extra` hash folds in ThinkingDuration, which
+// in turn depends on (FinishedAt - StartedAt). Anchoring both
+// timestamps removes any wall-clock dependency from the cache key
+// so two builds across a second boundary still hit the cache.
+const (
+	testStartedAt  int64 = 1_700_000_000
+	testFinishedAt int64 = 1_700_000_005
+	testFinishTime int64 = 1_700_000_006
+)
+
+// thinkingMessage builds an assistant message with a fixed reasoning
+// content and an optional text content. When text is empty the
+// message represents a still-thinking turn (matches IsThinking()).
+// Both reasoning timestamps are anchored to fixed Unix seconds so
+// ThinkingDuration is deterministic and cache-equality assertions
+// don't depend on wall-clock time.
+func thinkingMessage(id, thinking, text string) *message.Message {
+	parts := []message.ContentPart{
+		message.ReasoningContent{
+			Thinking:   thinking,
+			StartedAt:  testStartedAt,
+			FinishedAt: testFinishedAt,
+		},
+	}
+	if text != "" {
+		parts = append(parts, message.TextContent{Text: text})
+	}
+	return &message.Message{
+		ID:    id,
+		Role:  message.Assistant,
+		Parts: parts,
+	}
+}
+
+// errorMessage builds a finished assistant message whose finish part
+// carries an error reason plus a custom message and details.
+func errorMessage(id, errMsg, errDetails string) *message.Message {
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "partial output"},
+			message.Finish{
+				Reason:  message.FinishReasonError,
+				Message: errMsg,
+				Details: errDetails,
+				Time:    testFinishTime,
+			},
+		},
+	}
+}
+
+// renderTwoSetMessages drives a SetMessage cycle and returns the
+// section-cache identity (out string pointers via direct comparison
+// of the cached fields). The test compares `out` strings; identical
+// output across cycles is the cache-hit indicator we rely on.
+type sectionSnapshot struct {
+	thinking string
+	content  string
+	errSec   string
+}
+
+func snapshot(a *AssistantMessageItem) sectionSnapshot {
+	return sectionSnapshot{
+		thinking: a.thinkingSec.out,
+		content:  a.contentSec.out,
+		errSec:   a.errorSec.out,
+	}
+}
+
+// TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking covers
+// the central F4 invariant: streaming the main content through SetMessage
+// must keep the cached thinking render intact, provided the inputs to
+// the thinking section render (text, expanded flag, footer state) are
+// unchanged. We seed an already-non-empty content so that IsThinking()
+// is false on both renders — that's the steady streaming state where
+// the thinking block has finished and content keeps growing.
+func TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	thinking := "Step 1\nStep 2\nStep 3"
+	msg := thinkingMessage("a1", thinking, "Initial answer.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 71
+
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	require.NotEmpty(t, first.thinking, "thinking section must be populated after first render")
+
+	// Stream more content into the existing turn. Thinking text and
+	// footer state are byte-identical between the two renders.
+	updated := thinkingMessage("a1", thinking, "Initial answer. More streamed text.")
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking,
+		"content streaming must not invalidate the thinking section render")
+	require.NotEqual(t, first.content, second.content,
+		"content section must have been re-rendered")
+}
+
+// TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent is the
+// mirror of the previous test: extending thinking text must not force a
+// re-render of the content section.
+func TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	content := "Final answer goes here."
+	msg := thinkingMessage("a2", "Step 1", content)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 73
+
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	require.NotEmpty(t, first.content)
+
+	updated := thinkingMessage("a2", "Step 1\nStep 2", content)
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.content, second.content,
+		"thinking streaming must not invalidate the content section render")
+	require.NotEqual(t, first.thinking, second.thinking,
+		"thinking text changed; thinking section must have re-rendered")
+}
+
+// TestAssistantSectionCache_HashKeyDiscrimination asserts that two
+// messages with different source text hash to different per-section
+// keys, and that messages with identical source text hit the cache.
+func TestAssistantSectionCache_HashKeyDiscrimination(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	msgA := thinkingMessage("a3", "thinking A", "content A")
+	msgB := thinkingMessage("a3", "thinking B", "content B")
+
+	itemA := NewAssistantMessageItem(&sty, msgA).(*AssistantMessageItem)
+	itemB := NewAssistantMessageItem(&sty, msgB).(*AssistantMessageItem)
+
+	thinkSrcA, _ := itemA.thinkingKey()
+	thinkSrcB, _ := itemB.thinkingKey()
+	require.NotEqual(t, thinkSrcA, thinkSrcB,
+		"distinct thinking text must produce distinct FNV-64 source hashes")
+
+	contentSrcA, _ := itemA.contentKey()
+	contentSrcB, _ := itemB.contentKey()
+	require.NotEqual(t, contentSrcA, contentSrcB,
+		"distinct content text must produce distinct FNV-64 source hashes")
+
+	// Identical source text on a fresh item must produce the same
+	// hashes — keying invariant for cache hits.
+	itemAClone := NewAssistantMessageItem(&sty, thinkingMessage("a3", "thinking A", "content A")).(*AssistantMessageItem)
+	thinkSrcAClone, _ := itemAClone.thinkingKey()
+	contentSrcAClone, _ := itemAClone.contentKey()
+	require.Equal(t, thinkSrcA, thinkSrcAClone)
+	require.Equal(t, contentSrcA, contentSrcAClone)
+}
+
+// TestAssistantSectionCache_CloneRoundTrip guards the contract that
+// message.Clone() does not invalidate any section cache: re-keying off
+// the cloned message must produce identical hashes and the section
+// caches must serve byte-identical renders.
+func TestAssistantSectionCache_CloneRoundTrip(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	orig := thinkingMessage("a4", "Reasoning step.", "Answer text.")
+	item := NewAssistantMessageItem(&sty, orig).(*AssistantMessageItem)
+
+	const width = 75
+	_ = item.RawRender(width)
+	first := snapshot(item)
+
+	cloned := orig.Clone()
+	item.SetMessage(&cloned)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking, "clone must hit the thinking cache")
+	require.Equal(t, first.content, second.content, "clone must hit the content cache")
+}
+
+// TestAssistantSectionCache_ResizeInvalidatesAll asserts that a width
+// change forces a re-render of every section.
+func TestAssistantSectionCache_ResizeInvalidatesAll(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	msg := errorMessage("a5", "boom", strings.Repeat("detail line\n", 5))
+	// errorMessage returns FinishReasonError; combine with thinking
+	// content so all three sections are exercised.
+	msg.Parts = append([]message.ContentPart{
+		message.ReasoningContent{
+			Thinking:   "Considering options.",
+			StartedAt:  testStartedAt,
+			FinishedAt: testFinishedAt,
+		},
+	}, msg.Parts...)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	_ = item.RawRender(77)
+	first := snapshot(item)
+	require.NotEmpty(t, first.thinking)
+	require.NotEmpty(t, first.content)
+	require.NotEmpty(t, first.errSec)
+
+	_ = item.RawRender(117)
+	second := snapshot(item)
+
+	require.NotEqual(t, first.thinking, second.thinking, "resize must re-render the thinking section")
+	require.NotEqual(t, first.content, second.content, "resize must re-render the content section")
+	require.NotEqual(t, first.errSec, second.errSec, "resize must re-render the error section")
+}
+
+// TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent guards
+// that the error section caches independently. Editing the error
+// message must not invalidate the other two sections, and editing the
+// content must not invalidate the error section.
+func TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	build := func(thinking, content, errMsg, errDetails string) *message.Message {
+		return &message.Message{
+			ID:   "a6",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   thinking,
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: content},
+				message.Finish{
+					Reason:  message.FinishReasonError,
+					Message: errMsg,
+					Details: errDetails,
+					Time:    testFinishTime,
+				},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("think", "content", "boom", "details")).(*AssistantMessageItem)
+	_ = item.RawRender(79)
+	first := snapshot(item)
+
+	// Change only the error text. Thinking and content caches must
+	// survive; error cache must miss and re-render.
+	item.SetMessage(build("think", "content", "different boom", "different details"))
+	_ = item.RawRender(79)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking, "error change must not invalidate thinking")
+	require.Equal(t, first.content, second.content, "error change must not invalidate content")
+	require.NotEqual(t, first.errSec, second.errSec, "error change must re-render the error section")
+
+	// Now change only the content; error cache must survive.
+	item.SetMessage(build("think", "different content", "different boom", "different details"))
+	_ = item.RawRender(79)
+	third := snapshot(item)
+
+	require.Equal(t, second.thinking, third.thinking)
+	require.NotEqual(t, second.content, third.content)
+	require.Equal(t, second.errSec, third.errSec, "content change must not invalidate the error section")
+}
+
+// TestAssistantSectionCache_PrefixCacheRespectsSectionChanges guards
+// the F3/F4 boundary: the prefix cache must invalidate when any
+// underlying section changes. We verify by comparing the F3-cached
+// Render output across SetMessage cycles.
+func TestAssistantSectionCache_PrefixCacheRespectsSectionChanges(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	build := func(content string) *message.Message {
+		return &message.Message{
+			ID:   "a7",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.TextContent{Text: content},
+				message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("first content")).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	const width = 81
+	first := item.Render(width)
+
+	item.SetMessage(build("second content"))
+	second := item.Render(width)
+	require.NotEqual(t, first, second,
+		"prefix cache must invalidate when the content section changes")
+
+	// Re-set to the original content; the prefix cache should
+	// produce identical output again.
+	item.SetMessage(build("first content"))
+	third := item.Render(width)
+	require.Equal(t, first, third)
+}
+
+// TestAssistantSectionCache_ByteIdenticalToFreshRender asserts that the
+// F4 cached path produces the same bytes as a fresh-instance render of
+// the equivalent message — i.e. caching is invisible from the outside.
+// Drives a sequence of mutations (thinking change, content change,
+// finish) and compares every step against an independent item rendered
+// from scratch.
+func TestAssistantSectionCache_ByteIdenticalToFreshRender(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 83
+
+	type step struct {
+		name string
+		msg  *message.Message
+	}
+	startedAt := testStartedAt
+	finishedAt := testFinishedAt
+	finishTime := testFinishTime
+	steps := []step{
+		{
+			name: "thinking-only",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning", StartedAt: startedAt},
+				},
+			},
+		},
+		{
+			name: "thinking-grew",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt},
+				},
+			},
+		},
+		{
+			name: "content-arrived",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt},
+					message.TextContent{Text: "the answer"},
+				},
+			},
+		},
+		{
+			name: "finished-end-turn",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt},
+					message.TextContent{Text: "the answer"},
+					message.Finish{Reason: message.FinishReasonEndTurn, Time: finishTime},
+				},
+			},
+		},
+	}
+
+	first := steps[0].msg.Clone()
+	cached := NewAssistantMessageItem(&sty, &first).(*AssistantMessageItem)
+	for _, s := range steps {
+		cached.SetMessage(s.msg)
+		freshMsg := s.msg.Clone()
+		fresh := NewAssistantMessageItem(&sty, &freshMsg).(*AssistantMessageItem)
+		require.Equal(t, fresh.RawRender(width), cached.RawRender(width),
+			"step %q: cached path must match fresh render byte-for-byte", s.name)
+	}
+}
+
+// TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange
+// guards the F3 prefix cache against composition-only changes:
+// flipping the finish reason from EndTurn to Canceled appends a
+// constant "Canceled" line via renderMessageContent, but no
+// section's own source text changes. The prefix cache must observe
+// the difference (compositionKey is folded into prefixCacheKey) and
+// the resulting bytes must differ. As a second guarantee, a fresh
+// item built with the same final state must produce byte-equal
+// output to the cached item — caching must never produce stale or
+// divergent renders.
+func TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 87
+
+	build := func(reason message.FinishReason) *message.Message {
+		return &message.Message{
+			ID:   "comp",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.TextContent{Text: "hi"},
+				message.Finish{Reason: reason, Time: testFinishTime},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build(message.FinishReasonEndTurn)).(*AssistantMessageItem)
+	endTurnOut := item.Render(width)
+
+	// Flip only the finish reason. Thinking is empty and content
+	// text is unchanged, so no section's source hash moves; only
+	// compositionKey shifts. The prefix cache must miss.
+	item.SetMessage(build(message.FinishReasonCanceled))
+	canceledOut := item.Render(width)
+	require.NotEqual(t, endTurnOut, canceledOut,
+		"prefix cache must invalidate on composition-only change (finish reason)")
+
+	// A fresh item built with the same final state must match the
+	// cached item byte-for-byte — caching is invisible from the
+	// outside and never serves stale output.
+	fresh := NewAssistantMessageItem(&sty, build(message.FinishReasonCanceled)).(*AssistantMessageItem)
+	require.Equal(t, fresh.Render(width), canceledOut,
+		"cached output must equal a fresh render of the same final state")
+}
+
+// TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit guards
+// click-detection geometry across thinking-section cache hits. The
+// thinking box height feeds HandleMouseClick; it is recomputed
+// inside renderThinking and must be restored from
+// assistantSection.aux when the thinking cache hits. We render once
+// to capture the original height, trigger a content-only change so
+// thinkingKey stays identical (thinking text, expanded flag, and
+// footer state all unchanged), render again, and assert the
+// thinkingBoxHeight field is preserved.
+func TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 71
+
+	thinking := strings.Join([]string{
+		"Considering the request.",
+		"Looking at the relevant files.",
+		"Drafting a plan.",
+		"Verifying constraints.",
+	}, "\n")
+	msg := thinkingMessage("hbox", thinking, "initial answer")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.thinkingExpanded = true
+
+	_ = item.RawRender(width)
+	originalHeight := item.thinkingBoxHeight
+	require.Greater(t, originalHeight, 0,
+		"thinking box height must be populated after first render")
+
+	// Stomp the field so a stale read (cache hit that fails to
+	// restore aux) is detectable. Then trigger a content-only
+	// change: thinkingKey is byte-identical between renders, so
+	// the thinking section cache must hit and restore the
+	// preserved height via assistantSection.aux.
+	item.thinkingBoxHeight = -1
+	updated := thinkingMessage("hbox", thinking, "initial answer with more streamed text")
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+
+	require.Equal(t, originalHeight, item.thinkingBoxHeight,
+		"thinkingBoxHeight must be preserved across thinking section cache hits "+
+			"so HandleMouseClick keeps targeting the right rows")
+}