assistant_thinking_window_test.go

  1package chat
  2
  3import (
  4	"strings"
  5	"testing"
  6
  7	"charm.land/lipgloss/v2"
  8	"git.secluded.site/crush/internal/message"
  9	"git.secluded.site/crush/internal/ui/styles"
 10	"github.com/charmbracelet/x/ansi"
 11	"github.com/stretchr/testify/require"
 12)
 13
 14// thinkingMessageWithLines builds a still-thinking assistant message
 15// whose reasoning content is `count` short paragraphs separated by
 16// blank lines. The blank-line separation is what matters: glamour
 17// renders paragraph blocks one-per-line in the output (with a
 18// trailing blank line between paragraphs) instead of reflowing the
 19// entire input into one big wrapped paragraph. That gives us a
 20// post-glamour line count we can drive past the tail-window
 21// threshold deterministically. Each paragraph is tagged with its
 22// (1-based) index so the test can identify head vs tail in the
 23// rendered output.
 24//
 25// The message has no text content and no Finish part, so
 26// IsThinking() returns true and the render path skips the
 27// "Thought for" footer — keeping the rendered height computation
 28// simple.
 29func thinkingMessageWithLines(id string, count int) *message.Message {
 30	var b strings.Builder
 31	for i := 1; i <= count; i++ {
 32		b.WriteString("ln")
 33		b.WriteString(itoa(i))
 34		if i < count {
 35			// Blank line between paragraphs: glamour preserves the
 36			// per-paragraph structure rather than reflowing into one
 37			// wrapped block, so totalLines tracks count predictably.
 38			b.WriteString("\n\n")
 39		}
 40	}
 41	return &message.Message{
 42		ID:   id,
 43		Role: message.Assistant,
 44		Parts: []message.ContentPart{
 45			message.ReasoningContent{
 46				Thinking:  b.String(),
 47				StartedAt: testStartedAt,
 48			},
 49		},
 50	}
 51}
 52
 53// itoa is a local stdlib-free integer formatter used only by these
 54// tests; pulling fmt in just for %d would be wasteful when the test
 55// fixtures already churn 5000+ short strings.
 56func itoa(n int) string {
 57	if n == 0 {
 58		return "0"
 59	}
 60	var buf [20]byte
 61	i := len(buf)
 62	for n > 0 {
 63		i--
 64		buf[i] = byte('0' + n%10)
 65		n /= 10
 66	}
 67	return string(buf[i:])
 68}
 69
 70// renderedThinkingHeight returns the line count of the cached
 71// thinking section render only (not the full RawRender, which also
 72// includes the content and error sections). Drives a render at
 73// `width` first to populate the cache.
 74func renderedThinkingHeight(t *testing.T, item *AssistantMessageItem, width int) int {
 75	t.Helper()
 76	_ = item.RawRender(width)
 77	require.NotEmpty(t, item.thinkingSec.out,
 78		"thinking section must be populated after RawRender")
 79	return lipgloss.Height(item.thinkingSec.out)
 80}
 81
 82// TestThinkingWindow_CollapsedCapPreserved guards that F5 did not
 83// regress the existing collapsed-mode behaviour: a 5000-line
 84// thinking block in the default (collapsed) state still renders at
 85// most a small bounded height — the last `maxCollapsedThinkingHeight`
 86// lines plus the truncation hint. The thinking message keeps
 87// IsThinking() == true, so the optional "Thought for" footer is
 88// suppressed and the section height equals the box height.
 89func TestThinkingWindow_CollapsedCapPreserved(t *testing.T) {
 90	t.Parallel()
 91
 92	sty := styles.CharmtonePantera()
 93	msg := thinkingMessageWithLines("collapsed", 5000)
 94	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
 95
 96	// Default state must be collapsed.
 97	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
 98
 99	// Unique odd width avoids sharing the glamour renderer cache with
100	// any other parallel test (the renderer instance is memoized per
101	// width and is not safe for concurrent Render calls).
102	const width = 91
103	height := renderedThinkingHeight(t, item, width)
104
105	// Collapsed mode keeps the existing cap: last 10 lines + a
106	// 2-line hint prefix (hint + blank). Allow a small slack for
107	// any future style-driven padding so the test is robust to
108	// cosmetic tweaks while still being orders of magnitude below
109	// the 5000-line source.
110	const collapsedUpperBound = maxCollapsedThinkingHeight + 5
111	require.LessOrEqual(t, height, collapsedUpperBound,
112		"collapsed mode must remain bounded by the small cap; got %d", height)
113}
114
115// TestThinkingWindow_ExpandedShortSkipsTailWindow guards that a
116// short thinking block (well under the tail-window cap) still
117// toggles directly to full expansion without an intermediate
118// tail-window step and shows no affordance footer. The cycle is
119// collapsed -> full -> collapsed for short blocks; tail-window is
120// only inserted when it would actually elide content.
121func TestThinkingWindow_ExpandedShortSkipsTailWindow(t *testing.T) {
122	t.Parallel()
123
124	sty := styles.CharmtonePantera()
125	const lines = 50
126	require.Less(t, lines, maxExpandedThinkingTailLines,
127		"this test relies on the source being well under the tail cap")
128	msg := thinkingMessageWithLines("short", lines)
129	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
130
131	require.True(t, item.ToggleExpanded(),
132		"first toggle should report expanded")
133	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode,
134		"short blocks must skip tail-window and go straight to full expansion")
135
136	const width = 93
137	_ = item.RawRender(width)
138	out := item.thinkingSec.out
139	plain := ansi.Strip(out)
140
141	require.NotContains(t, plain, "earlier lines hidden",
142		"short blocks must not show the tail-window affordance")
143	require.NotContains(t, plain, "lines hidden",
144		"short expanded blocks must not show any truncation hint")
145	require.Contains(t, plain, "ln1 ",
146		"a fully expanded short block must include the very first source paragraph")
147	require.Contains(t, plain, "ln50 ",
148		"a fully expanded short block must include the last source paragraph")
149}
150
151// TestThinkingWindow_TailWindowed asserts the central F5 behaviour:
152// expanding a long thinking block produces a tail window of size
153// `maxExpandedThinkingTailLines` plus the affordance footer, with
154// the LAST source line present (i.e. we tailed, not headed) and
155// earlier lines elided.
156//
157// Beyond presence/absence of sentinels, this test verifies a true
158// `tail -K` relationship between the tail-windowed render and the
159// fully-expanded render of the same source at the same width: the
160// last K plain-ANSI lines of the windowed render must byte-equal
161// the last K lines of the unwindowed render.
162//
163// K is sized below the cap to absorb the affordance prefix (hint +
164// blank line) and any small framing differences introduced by the
165// bordered ThinkingBox. The cap minus 5 leaves a comfortable margin
166// for padding/footer rows while still asserting that the bulk of
167// the rendered tail is identical.
168func TestThinkingWindow_TailWindowed(t *testing.T) {
169	t.Parallel()
170
171	sty := styles.CharmtonePantera()
172	const total = 5000
173	const width = 95
174
175	// Tail-windowed render.
176	tailMsg := thinkingMessageWithLines("tail", total)
177	tailItem := NewAssistantMessageItem(&sty, tailMsg).(*AssistantMessageItem)
178	require.True(t, tailItem.ToggleExpanded(), "first toggle should report expanded")
179	require.Equal(t, thinkingTailWindow, tailItem.thinkingViewMode,
180		"a long block must enter tail-window after the first toggle")
181
182	height := renderedThinkingHeight(t, tailItem, width)
183
184	// The visible window is N tail lines plus an affordance line
185	// and a blank-line spacer (matching the existing collapsed-mode
186	// hint structure). Allow a small slack for style-driven
187	// padding.
188	const expectedFloor = maxExpandedThinkingTailLines + 1
189	const expectedCeil = maxExpandedThinkingTailLines + 5
190	require.GreaterOrEqual(t, height, expectedFloor,
191		"tail-window must include at least N + affordance lines; got %d", height)
192	require.LessOrEqual(t, height, expectedCeil,
193		"tail-window must not exceed N + a small padding budget; got %d", height)
194
195	tailPlain := ansi.Strip(tailItem.thinkingSec.out)
196
197	require.Contains(t, tailPlain, "earlier lines hidden",
198		"tail-windowed render must include the affordance footer")
199	require.Contains(t, tailPlain, "ln5000",
200		"tail-windowed render must include the LAST source paragraph — we tailed, not headed")
201	require.NotContains(t, tailPlain, "ln1 ",
202		"tail-windowed render must elide early source paragraphs")
203
204	// Independent reference render: same source, same width, full
205	// expansion (no tail slice). The tail-windowed output's last K
206	// lines must byte-equal the unwindowed output's last K lines.
207	fullMsg := thinkingMessageWithLines("tail-full-ref", total)
208	fullItem := NewAssistantMessageItem(&sty, fullMsg).(*AssistantMessageItem)
209	fullItem.thinkingViewMode = thinkingFullExpanded
210	_ = fullItem.RawRender(width)
211	fullPlain := ansi.Strip(fullItem.thinkingSec.out)
212
213	tailLines := strings.Split(tailPlain, "\n")
214	fullLines := strings.Split(fullPlain, "\n")
215
216	// K is the cap minus a small budget that covers the affordance
217	// prefix (hint line + blank line) and any framing differences
218	// the bordered ThinkingBox style may introduce around the
219	// edges. Documented inline because going much larger lets the
220	// affordance row leak into the comparison; going much smaller
221	// dilutes the assertion.
222	const K = maxExpandedThinkingTailLines - 5
223	require.GreaterOrEqual(t, len(tailLines), K,
224		"tail render must contain at least K lines; got %d", len(tailLines))
225	require.GreaterOrEqual(t, len(fullLines), K,
226		"full render must contain at least K lines; got %d", len(fullLines))
227
228	tailTail := tailLines[len(tailLines)-K:]
229	fullTail := fullLines[len(fullLines)-K:]
230	require.Equal(t, fullTail, tailTail,
231		"tail-windowed render's last %d lines must byte-equal the unwindowed render's last %d lines (true tail -K relationship)",
232		K, K)
233}
234
235// TestThinkingWindow_PromoteToFull verifies the cycle continues from
236// tail-window to full expansion: the second toggle drops the
237// affordance, removes the tail slice, and produces a render that
238// matches a fresh item rendered directly in the full-expanded
239// state.
240func TestThinkingWindow_PromoteToFull(t *testing.T) {
241	t.Parallel()
242
243	sty := styles.CharmtonePantera()
244	const total = 1500
245	msg := thinkingMessageWithLines("promote", total)
246	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
247
248	const width = 97
249
250	require.True(t, item.ToggleExpanded())
251	require.Equal(t, thinkingTailWindow, item.thinkingViewMode)
252	_ = item.RawRender(width)
253	tailOut := item.thinkingSec.out
254	require.Contains(t, ansi.Strip(tailOut), "earlier lines hidden")
255
256	require.True(t, item.ToggleExpanded(), "second toggle stays expanded (full)")
257	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode)
258	_ = item.RawRender(width)
259	fullOut := item.thinkingSec.out
260	fullPlain := ansi.Strip(fullOut)
261
262	require.NotContains(t, fullPlain, "earlier lines hidden",
263		"full expansion must drop the tail-window affordance")
264	require.Contains(t, fullPlain, "ln1 ",
265		"full expansion must include the first source paragraph")
266	require.Contains(t, fullPlain, "ln1500 ",
267		"full expansion must include the last source paragraph")
268
269	// Independent reference: a fresh item, rendered straight into
270	// the full-expanded state, must produce byte-equal output.
271	freshMsg := thinkingMessageWithLines("promote-fresh", total)
272	fresh := NewAssistantMessageItem(&sty, freshMsg).(*AssistantMessageItem)
273	fresh.thinkingViewMode = thinkingFullExpanded
274	_ = fresh.RawRender(width)
275	require.Equal(t, fresh.thinkingSec.out, fullOut,
276		"cached full-expanded output must match a fresh full-expanded render")
277
278	// And the cycle closes back to collapsed.
279	require.False(t, item.ToggleExpanded(), "third toggle must report collapsed")
280	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
281}
282
283// sectionKey is the tuple that defines a cache-hit identity for an
284// assistantSection: (width, srcHash, extra). Comparing this tuple
285// across mutations is a stronger invariant than byte-equality of
286// rendered output: byte-equality could in principle hold even if
287// the cache invalidated and re-rendered identical bytes, while
288// tuple-equality proves the lookup key never moved.
289type sectionKey struct {
290	width   int
291	srcHash uint64
292	extra   uint64
293}
294
295func keyOf(s assistantSection) sectionKey {
296	return sectionKey{width: s.width, srcHash: s.srcHash, extra: s.extra}
297}
298
299// TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow
300// guards the F4/F5 boundary: streaming the main content while the
301// thinking block sits in tail-window mode must NOT invalidate the
302// thinking section cache. Tail-window state is folded into
303// thinkingKey()'s extra hash, so changing only the content text
304// keeps thinking's (srcHash, extra) tuple identical and the cache
305// hits.
306//
307// The assertion is on the cache key tuple, not just rendered bytes:
308// equal output could in principle survive a re-render with
309// identical inputs, but identical (width, srcHash, extra) tuples
310// across the SetMessage cycle prove the thinking cache was never
311// invalidated to begin with. The mirror tuple on the content
312// section MUST move (the source text changed), or the test isn't
313// exercising what it claims to.
314func TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow(t *testing.T) {
315	t.Parallel()
316
317	sty := styles.CharmtonePantera()
318	const total = 1000
319
320	build := func(content string) *message.Message {
321		var b strings.Builder
322		for i := 1; i <= total; i++ {
323			b.WriteString("ln")
324			b.WriteString(itoa(i))
325			if i < total {
326				b.WriteString("\n\n")
327			}
328		}
329		parts := []message.ContentPart{
330			message.ReasoningContent{
331				Thinking:   b.String(),
332				StartedAt:  testStartedAt,
333				FinishedAt: testFinishedAt,
334			},
335		}
336		if content != "" {
337			parts = append(parts, message.TextContent{Text: content})
338		}
339		return &message.Message{ID: "tail-stream", Role: message.Assistant, Parts: parts}
340	}
341
342	item := NewAssistantMessageItem(&sty, build("first answer")).(*AssistantMessageItem)
343	item.thinkingViewMode = thinkingTailWindow
344
345	const width = 99
346	_ = item.RawRender(width)
347	first := snapshot(item)
348	firstThinkingKey := keyOf(item.thinkingSec)
349	firstContentKey := keyOf(item.contentSec)
350	require.NotEmpty(t, first.thinking)
351
352	item.SetMessage(build("first answer with more streaming text"))
353	_ = item.RawRender(width)
354	second := snapshot(item)
355	secondThinkingKey := keyOf(item.thinkingSec)
356	secondContentKey := keyOf(item.contentSec)
357
358	require.Equal(t, firstThinkingKey, secondThinkingKey,
359		"thinking section's (width, srcHash, extra) tuple must not move "+
360			"across a content-only update — proves the cache key never invalidated")
361	require.Equal(t, first.thinking, second.thinking,
362		"content streaming must not invalidate the tail-windowed thinking cache")
363	require.NotEqual(t, firstContentKey, secondContentKey,
364		"content section's tuple MUST move; otherwise this test isn't exercising a real content change")
365	require.NotEqual(t, first.content, second.content,
366		"content section must have re-rendered")
367}
368
369// TestThinkingWindow_ToggleInvalidatesOnlyThinking verifies that
370// cycling thinkingViewMode invalidates the thinking section cache
371// alone — content and error caches survive across the toggle.
372//
373// Like TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow,
374// the assertion is on the cache key tuple (width, srcHash, extra)
375// at each section, not just on rendered bytes:
376//   - thinking's tuple MUST move (extra folds in thinkingViewMode)
377//   - content's and error's tuples MUST NOT move (their keys depend
378//     only on their own source text, untouched by the toggle).
379func TestThinkingWindow_ToggleInvalidatesOnlyThinking(t *testing.T) {
380	t.Parallel()
381
382	sty := styles.CharmtonePantera()
383	const total = 1500
384	build := func() *message.Message {
385		var b strings.Builder
386		for i := 1; i <= total; i++ {
387			b.WriteString("ln")
388			b.WriteString(itoa(i))
389			if i < total {
390				b.WriteString("\n\n")
391			}
392		}
393		return &message.Message{
394			ID:   "toggle-iso",
395			Role: message.Assistant,
396			Parts: []message.ContentPart{
397				message.ReasoningContent{
398					Thinking:   b.String(),
399					StartedAt:  testStartedAt,
400					FinishedAt: testFinishedAt,
401				},
402				message.TextContent{Text: "answer text"},
403				message.Finish{
404					Reason:  message.FinishReasonError,
405					Message: "boom",
406					Details: "details",
407					Time:    testFinishTime,
408				},
409			},
410		}
411	}
412
413	item := NewAssistantMessageItem(&sty, build()).(*AssistantMessageItem)
414
415	const width = 101
416	_ = item.RawRender(width)
417	first := snapshot(item)
418	firstThink := keyOf(item.thinkingSec)
419	firstContent := keyOf(item.contentSec)
420	firstErr := keyOf(item.errorSec)
421	require.NotEmpty(t, first.thinking)
422	require.NotEmpty(t, first.content)
423	require.NotEmpty(t, first.errSec)
424
425	// Cycle: collapsed -> tail-window. Only thinking should change.
426	require.True(t, item.ToggleExpanded())
427	require.Equal(t, thinkingTailWindow, item.thinkingViewMode)
428	_ = item.RawRender(width)
429	second := snapshot(item)
430	secondThink := keyOf(item.thinkingSec)
431	secondContent := keyOf(item.contentSec)
432	secondErr := keyOf(item.errorSec)
433
434	require.NotEqual(t, firstThink, secondThink,
435		"thinking section's tuple MUST move on toggle (extra folds in thinkingViewMode)")
436	require.Equal(t, firstContent, secondContent,
437		"content section's tuple must not move on a thinking toggle")
438	require.Equal(t, firstErr, secondErr,
439		"error section's tuple must not move on a thinking toggle")
440	require.NotEqual(t, first.thinking, second.thinking,
441		"toggling into tail-window must re-render the thinking section")
442	require.Equal(t, first.content, second.content,
443		"toggling thinking view-mode must not invalidate the content section")
444	require.Equal(t, first.errSec, second.errSec,
445		"toggling thinking view-mode must not invalidate the error section")
446
447	// Cycle: tail-window -> full. Same expectation.
448	require.True(t, item.ToggleExpanded())
449	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode)
450	_ = item.RawRender(width)
451	third := snapshot(item)
452	thirdThink := keyOf(item.thinkingSec)
453	thirdContent := keyOf(item.contentSec)
454	thirdErr := keyOf(item.errorSec)
455
456	require.NotEqual(t, secondThink, thirdThink,
457		"thinking section's tuple MUST move on the second toggle as well")
458	require.Equal(t, secondContent, thirdContent,
459		"content section's tuple must remain stable across the second toggle")
460	require.Equal(t, secondErr, thirdErr,
461		"error section's tuple must remain stable across the second toggle")
462	require.NotEqual(t, second.thinking, third.thinking,
463		"toggling into full expansion must re-render the thinking section")
464	require.Equal(t, second.content, third.content)
465	require.Equal(t, second.errSec, third.errSec)
466}
467
468// TestThinkingWindow_BoxHeightTracksWindow asserts that
469// thinkingBoxHeight reflects the WINDOWED render's height in
470// tail-window mode, not the (much larger) full thinking height.
471// This is what HandleMouseClick uses to detect whether a click
472// landed on the thinking box, so getting it wrong would make
473// click detection extend off the bottom of the visible box.
474func TestThinkingWindow_BoxHeightTracksWindow(t *testing.T) {
475	t.Parallel()
476
477	sty := styles.CharmtonePantera()
478	const total = 5000
479	msg := thinkingMessageWithLines("box-height", total)
480	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
481
482	const width = 103
483
484	// Tail-window: height should be roughly the cap.
485	item.thinkingViewMode = thinkingTailWindow
486	_ = item.RawRender(width)
487	tailHeight := item.thinkingBoxHeight
488	require.Greater(t, tailHeight, 0)
489	require.LessOrEqual(t, tailHeight, maxExpandedThinkingTailLines+5,
490		"tail-window box height must reflect the windowed render, not the full thinking height; got %d",
491		tailHeight)
492
493	// Full expansion: height should grow well past the tail cap.
494	item.thinkingViewMode = thinkingFullExpanded
495	_ = item.RawRender(width)
496	fullHeight := item.thinkingBoxHeight
497	require.Greater(t, fullHeight, maxExpandedThinkingTailLines*2,
498		"full expansion box height must reflect the full thinking render; got %d",
499		fullHeight)
500}