assistant.go

  1package chat
  2
  3import (
  4	"encoding/binary"
  5	"fmt"
  6	"hash/fnv"
  7	"strings"
  8
  9	tea "charm.land/bubbletea/v2"
 10	"charm.land/lipgloss/v2"
 11	"github.com/charmbracelet/crush/internal/message"
 12	"github.com/charmbracelet/crush/internal/ui/anim"
 13	"github.com/charmbracelet/crush/internal/ui/common"
 14	"github.com/charmbracelet/crush/internal/ui/styles"
 15	"github.com/charmbracelet/x/ansi"
 16)
 17
 18// assistantMessageTruncateFormat is the text shown when an assistant message is
 19// truncated in the collapsed state.
 20const assistantMessageTruncateFormat = "… (%d lines hidden) [click or space to expand]"
 21
 22// assistantMessageTailWindowFormat is shown above a tail-windowed thinking
 23// block to advertise that earlier lines exist and that the user can
 24// promote the view to a full expansion. The promotion is wired through
 25// the existing ToggleExpanded path (click / space) — F5 deliberately
 26// does not add a new keybinding.
 27const assistantMessageTailWindowFormat = "… %d earlier lines hidden [click or space for full view]"
 28
 29// maxCollapsedThinkingHeight defines the maximum height of the thinking
 30const maxCollapsedThinkingHeight = 10
 31
 32// maxExpandedThinkingTailLines is the F5 tail-window cap. When the user
 33// expands a thinking block whose post-glamour line count exceeds this
 34// threshold, only the last N lines are shown with an affordance line
 35// indicating how many earlier lines are hidden. Clicking / pressing
 36// space again promotes the view to a full expansion. The slice is
 37// taken AFTER glamour render (not before) so fenced code blocks,
 38// lists, and tables are not torn at arbitrary boundaries.
 39const maxExpandedThinkingTailLines = 200
 40
 41// thinkingViewMode is the F5 three-state view machine for the thinking
 42// block. ToggleExpanded cycles
 43// collapsed → tail-window → full-expanded → collapsed, skipping the
 44// tail-window step when the rendered thinking fits within the cap so
 45// short blocks still toggle in two clicks.
 46type thinkingViewMode uint8
 47
 48const (
 49	thinkingCollapsed thinkingViewMode = iota
 50	thinkingTailWindow
 51	thinkingFullExpanded
 52)
 53
 54// assistantSection is a per-section render cache for AssistantMessageItem.
 55// Each section (thinking, content, error) carries its own keys so that
 56// streaming a section does not invalidate a different — often more
 57// expensive — section's cached render. srcHash is an FNV-64 of the
 58// section's source text; extra captures any other state that changes
 59// the rendered output (e.g. thinkingExpanded, the thinking footer
 60// inputs). valid disambiguates a real cache hit from the zero value
 61// when both source text and extras hash to zero. aux carries any
 62// per-section side data that the caller needs to recover on a hit
 63// (e.g. the thinking box height for click detection).
 64type assistantSection struct {
 65	width   int
 66	srcHash uint64
 67	extra   uint64
 68	out     string
 69	h       int
 70	aux     int
 71	valid   bool
 72}
 73
 74// hit reports whether the cache entry matches the requested key.
 75func (s *assistantSection) hit(width int, srcHash, extra uint64) bool {
 76	return s.valid && s.width == width && s.srcHash == srcHash && s.extra == extra
 77}
 78
 79// store records the rendered output under the given key.
 80func (s *assistantSection) store(width int, srcHash, extra uint64, out string, aux int) {
 81	s.width = width
 82	s.srcHash = srcHash
 83	s.extra = extra
 84	s.out = out
 85	s.h = lipgloss.Height(out)
 86	s.aux = aux
 87	s.valid = true
 88}
 89
 90// reset drops the cached output.
 91func (s *assistantSection) reset() {
 92	*s = assistantSection{}
 93}
 94
 95// fnv64 hashes a single string with FNV-64.
 96func fnv64(s string) uint64 {
 97	h := fnv.New64a()
 98	_, _ = h.Write([]byte(s))
 99	return h.Sum64()
100}
101
102// fnvFields hashes a list of byte fields with length-prefix framing
103// so that no concatenation collision can occur between distinct
104// field tuples (a NUL inside one field cannot impersonate a
105// boundary between two fields). Each field is preceded by its
106// length encoded as 8 bytes little-endian.
107func fnvFields(fields ...[]byte) uint64 {
108	h := fnv.New64a()
109	var lenBuf [8]byte
110	for _, f := range fields {
111		binary.LittleEndian.PutUint64(lenBuf[:], uint64(len(f)))
112		_, _ = h.Write(lenBuf[:])
113		_, _ = h.Write(f)
114	}
115	return h.Sum64()
116}
117
118// AssistantMessageItem represents an assistant message in the chat UI.
119//
120// This item includes thinking, and the content but does not include the tool calls.
121type AssistantMessageItem struct {
122	*highlightableMessageItem
123	*cachedMessageItem
124	*focusableMessageItem
125
126	message           *message.Message
127	sty               *styles.Styles
128	anim              *anim.Anim
129	thinkingViewMode  thinkingViewMode
130	thinkingBoxHeight int // Tracks the rendered thinking box height for click detection.
131
132	// Per-section render caches. Splitting these out means content
133	// streaming does not invalidate the (often expensive) thinking
134	// render, and vice versa.
135	thinkingSec assistantSection
136	contentSec  assistantSection
137	errorSec    assistantSection
138}
139
140var _ Expandable = (*AssistantMessageItem)(nil)
141
142// NewAssistantMessageItem creates a new AssistantMessageItem.
143func NewAssistantMessageItem(sty *styles.Styles, message *message.Message) MessageItem {
144	a := &AssistantMessageItem{
145		highlightableMessageItem: defaultHighlighter(sty),
146		cachedMessageItem:        &cachedMessageItem{},
147		focusableMessageItem:     &focusableMessageItem{},
148		message:                  message,
149		sty:                      sty,
150	}
151
152	a.anim = anim.New(anim.Settings{
153		ID:          a.ID(),
154		Size:        15,
155		GradColorA:  sty.WorkingGradFromColor,
156		GradColorB:  sty.WorkingGradToColor,
157		LabelColor:  sty.WorkingLabelColor,
158		CycleColors: true,
159	})
160	return a
161}
162
163// StartAnimation starts the assistant message animation if it should be spinning.
164func (a *AssistantMessageItem) StartAnimation() tea.Cmd {
165	if !a.isSpinning() {
166		return nil
167	}
168	return a.anim.Start()
169}
170
171// Animate progresses the assistant message animation if it should be spinning.
172func (a *AssistantMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
173	if !a.isSpinning() {
174		return nil
175	}
176	return a.anim.Animate(msg)
177}
178
179// ID implements MessageItem.
180func (a *AssistantMessageItem) ID() string {
181	return a.message.ID
182}
183
184// RawRender implements [MessageItem].
185func (a *AssistantMessageItem) RawRender(width int) string {
186	cappedWidth := cappedMessageWidth(width)
187
188	var spinner string
189	if a.isSpinning() {
190		spinner = a.renderSpinning()
191	}
192
193	content, height := a.renderMessageContent(cappedWidth)
194	highlightedContent := a.renderHighlighted(content, cappedWidth, height)
195	if spinner != "" {
196		if highlightedContent != "" {
197			highlightedContent += "\n\n"
198		}
199		return highlightedContent + spinner
200	}
201
202	return highlightedContent
203}
204
205// Render implements MessageItem.
206func (a *AssistantMessageItem) Render(width int) string {
207	// XXX: Here, we're manually applying the focused/blurred styles because
208	// using lipgloss.Render can degrade performance for long messages due to
209	// it's wrapping logic.
210	// We already know that the content is wrapped to the correct width in
211	// RawRender, so we can just apply the styles directly to each line.
212	//
213	// The split + per-line prefix loop is O(L); cache the result keyed
214	// by (width, focused, sectionsFingerprint) so steady-state Render
215	// becomes a pointer return. The sectionsFingerprint folds in the
216	// per-section srcHash/extra so that any sub-cache change
217	// invalidates this prefix cache without requiring an explicit
218	// drop. Bypass the cache while spinning (RawRender's spinner
219	// suffix changes every animation frame) or while a highlight
220	// range is active (selection drag).
221	useCache := !a.isSpinning() && !a.isHighlighted()
222	cappedWidth := cappedMessageWidth(width)
223	key := a.prefixCacheKey(cappedWidth)
224	if useCache {
225		if cached, ok := a.getCachedPrefixedRender(width, key); ok {
226			return cached
227		}
228	}
229	focused := a.sty.Messages.AssistantFocused.Render()
230	blurred := a.sty.Messages.AssistantBlurred.Render()
231	rendered := a.RawRender(width)
232	lines := strings.Split(rendered, "\n")
233	for i, line := range lines {
234		if a.focused {
235			lines[i] = focused + line
236		} else {
237			lines[i] = blurred + line
238		}
239	}
240	out := strings.Join(lines, "\n")
241	if useCache {
242		a.setCachedPrefixedRender(out, width, key)
243	}
244	return out
245}
246
247// prefixCacheKey builds the F3 prefixed-render cache key. We pack the
248// focus bit into bit 0 and a fingerprint of the section caches into
249// the upper bits, so any change to a sub-section's source text or
250// extras forces the prefix cache to miss without needing an explicit
251// drop. cappedWidth is included so a cached prefix never survives a
252// section-cache miss caused by a width change. The finish reason is
253// folded in too because it controls the composition of
254// renderMessageContent (e.g. appending the constant "Canceled"
255// string) — that decision lives outside any section's own hash.
256func (a *AssistantMessageItem) prefixCacheKey(cappedWidth int) uint64 {
257	thinkSrc, thinkExtra := a.thinkingKey()
258	contentSrc, contentExtra := a.contentKey()
259	errSrc, errExtra := a.errorKey()
260	h := fnv.New64a()
261	var buf [8]byte
262	writeU64 := func(v uint64) {
263		for i := range 8 {
264			buf[i] = byte(v >> (8 * i))
265		}
266		_, _ = h.Write(buf[:])
267	}
268	writeU64(uint64(cappedWidth))
269	writeU64(thinkSrc)
270	writeU64(thinkExtra)
271	writeU64(contentSrc)
272	writeU64(contentExtra)
273	writeU64(errSrc)
274	writeU64(errExtra)
275	writeU64(a.compositionKey())
276	fingerprint := h.Sum64()
277	var focusBit uint64
278	if a.focused {
279		focusBit = 1
280	}
281	return (fingerprint &^ 1) | focusBit
282}
283
284// compositionKey hashes the inputs to renderMessageContent's structural
285// decisions (which sections to include, whether to append the
286// constant "Canceled" footer) so that flipping IsFinished or the
287// finish reason invalidates the prefix cache even when no section's
288// own source text changed.
289func (a *AssistantMessageItem) compositionKey() uint64 {
290	var finishedFlag byte
291	var reason string
292	if a.message.IsFinished() {
293		finishedFlag = 1
294		reason = string(a.message.FinishReason())
295	}
296	// Length-prefixed framing keeps the finished flag and the reason
297	// string from blending into one another.
298	return fnvFields([]byte{finishedFlag}, []byte(reason))
299}
300
301// renderMessageContent renders the message content including thinking, main
302// content, and finish reason. Each section is served from its own cache;
303// only the section whose source text or extras changed since the last
304// render is recomputed.
305func (a *AssistantMessageItem) renderMessageContent(width int) (string, int) {
306	var messageParts []string
307	thinking := strings.TrimSpace(a.message.ReasoningContent().Thinking)
308	content := strings.TrimSpace(a.message.Content().Text)
309
310	if thinking != "" {
311		messageParts = append(messageParts, a.cachedThinking(width))
312	}
313
314	if content != "" {
315		if thinking != "" {
316			messageParts = append(messageParts, "")
317		}
318		messageParts = append(messageParts, a.cachedContent(width))
319	}
320
321	if a.message.IsFinished() {
322		switch a.message.FinishReason() {
323		case message.FinishReasonCanceled:
324			messageParts = append(messageParts, a.sty.Messages.AssistantCanceled.Render("Canceled"))
325		case message.FinishReasonError:
326			messageParts = append(messageParts, a.cachedError(width))
327		}
328	}
329
330	out := strings.Join(messageParts, "\n")
331	return out, lipgloss.Height(out)
332}
333
334// thinkingKey returns the (srcHash, extra) cache key components for the
335// thinking section. extra folds in everything other than the raw
336// thinking text that affects the rendered output: the view mode
337// (collapsed / tail-window / full) and the footer state (which
338// depends on IsThinking, ToolCalls, and ThinkingDuration).
339func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) {
340	thinking := a.message.ReasoningContent().Thinking
341	srcHash := fnv64(thinking)
342
343	showFooter := !a.message.IsThinking() || len(a.message.ToolCalls()) > 0
344	var durationStr string
345	if showFooter {
346		duration := a.message.ThinkingDuration()
347		if duration.String() != "0s" {
348			durationStr = duration.String()
349		}
350	}
351	var footer byte
352	if showFooter {
353		footer = 1
354	}
355	// Length-prefixed framing avoids any delimiter collision between
356	// the flag bytes and the duration string. The view mode is folded
357	// in so that toggling collapsed ↔ tail-window ↔ full invalidates
358	// only the thinking section, not content/error.
359	extra := fnvFields([]byte{byte(a.thinkingViewMode), footer}, []byte(durationStr))
360	return srcHash, extra
361}
362
363// contentKey returns the (srcHash, extra) cache key components for the
364// main content section.
365func (a *AssistantMessageItem) contentKey() (uint64, uint64) {
366	return fnv64(a.message.Content().Text), 0
367}
368
369// errorKey returns the (srcHash, extra) cache key components for the
370// error section. Returns (0, 0) when no error is present so the cache
371// stays a no-op for non-error messages.
372func (a *AssistantMessageItem) errorKey() (uint64, uint64) {
373	if !a.message.IsFinished() || a.message.FinishReason() != message.FinishReasonError {
374		return 0, 0
375	}
376	finishPart := a.message.FinishPart()
377	if finishPart == nil {
378		return 0, 0
379	}
380	// Length-prefixed framing prevents Message+Details collisions
381	// between distinct (Message, Details) tuples that would
382	// otherwise concatenate to the same byte sequence.
383	return fnvFields([]byte(finishPart.Message), []byte(finishPart.Details)), 0
384}
385
386// cachedThinking returns the rendered thinking section, computing and
387// caching it on miss. The thinking-box height (used for click target
388// detection) is preserved across hits via assistantSection.aux so the
389// cached path never desyncs click detection.
390func (a *AssistantMessageItem) cachedThinking(width int) string {
391	srcHash, extra := a.thinkingKey()
392	if a.thinkingSec.hit(width, srcHash, extra) {
393		a.thinkingBoxHeight = a.thinkingSec.aux
394		return a.thinkingSec.out
395	}
396	out := a.renderThinking(a.message.ReasoningContent().Thinking, width)
397	a.thinkingSec.store(width, srcHash, extra, out, a.thinkingBoxHeight)
398	return out
399}
400
401// cachedContent returns the rendered content section.
402func (a *AssistantMessageItem) cachedContent(width int) string {
403	srcHash, extra := a.contentKey()
404	if a.contentSec.hit(width, srcHash, extra) {
405		return a.contentSec.out
406	}
407	out := a.renderMarkdown(a.message.Content().Text, width)
408	a.contentSec.store(width, srcHash, extra, out, 0)
409	return out
410}
411
412// cachedError returns the rendered error section.
413func (a *AssistantMessageItem) cachedError(width int) string {
414	srcHash, extra := a.errorKey()
415	if a.errorSec.hit(width, srcHash, extra) {
416		return a.errorSec.out
417	}
418	out := a.renderError(width)
419	a.errorSec.store(width, srcHash, extra, out, 0)
420	return out
421}
422
423// renderThinking renders the thinking/reasoning content with footer.
424//
425// Slicing happens AFTER glamour rendering so fenced code blocks, list
426// continuations, and tables are not split mid-block — the same
427// boundary problem §4.4 of the design note flags. The bordered
428// ThinkingBox style is applied on top of the (already-windowed)
429// lines so the visual box matches what the user sees today.
430func (a *AssistantMessageItem) renderThinking(thinking string, width int) string {
431	renderer := common.QuietMarkdownRenderer(a.sty, width)
432	rendered, err := renderer.Render(thinking)
433	if err != nil {
434		rendered = thinking
435	}
436	rendered = strings.TrimSpace(rendered)
437
438	lines := strings.Split(rendered, "\n")
439	totalLines := len(lines)
440
441	switch a.thinkingViewMode {
442	case thinkingCollapsed:
443		if totalLines > maxCollapsedThinkingHeight {
444			lines = lines[totalLines-maxCollapsedThinkingHeight:]
445			hint := a.sty.Messages.ThinkingTruncationHint.Render(
446				fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight),
447			)
448			lines = append([]string{hint, ""}, lines...)
449		}
450	case thinkingTailWindow:
451		if totalLines > maxExpandedThinkingTailLines {
452			lines = lines[totalLines-maxExpandedThinkingTailLines:]
453			hint := a.sty.Messages.ThinkingTruncationHint.Render(
454				fmt.Sprintf(assistantMessageTailWindowFormat, totalLines-maxExpandedThinkingTailLines),
455			)
456			lines = append([]string{hint, ""}, lines...)
457		}
458	}
459
460	thinkingStyle := a.sty.Messages.ThinkingBox.Width(width)
461	result := thinkingStyle.Render(strings.Join(lines, "\n"))
462	a.thinkingBoxHeight = lipgloss.Height(result)
463
464	var footer string
465	// if thinking is done add the thought for footer
466	if !a.message.IsThinking() || len(a.message.ToolCalls()) > 0 {
467		duration := a.message.ThinkingDuration()
468		if duration.String() != "0s" {
469			footer = a.sty.Messages.ThinkingFooterTitle.Render("Thought for ") +
470				a.sty.Messages.ThinkingFooterDuration.Render(duration.String())
471		}
472	}
473
474	if footer != "" {
475		result += "\n\n" + footer
476	}
477
478	return result
479}
480
481// renderMarkdown renders content as markdown.
482func (a *AssistantMessageItem) renderMarkdown(content string, width int) string {
483	renderer := common.MarkdownRenderer(a.sty, width)
484	result, err := renderer.Render(content)
485	if err != nil {
486		return content
487	}
488	return strings.TrimSuffix(result, "\n")
489}
490
491func (a *AssistantMessageItem) renderSpinning() string {
492	if a.message.IsThinking() {
493		a.anim.SetLabel("Thinking")
494	} else if a.message.IsSummaryMessage {
495		a.anim.SetLabel("Summarizing")
496	}
497	return a.anim.Render()
498}
499
500// renderError renders an error message.
501func (a *AssistantMessageItem) renderError(width int) string {
502	finishPart := a.message.FinishPart()
503	errTag := a.sty.Messages.ErrorTag.Render("ERROR")
504	truncated := ansi.Truncate(finishPart.Message, width-2-lipgloss.Width(errTag), "...")
505	title := fmt.Sprintf("%s %s", errTag, a.sty.Messages.ErrorTitle.Render(truncated))
506	details := a.sty.Messages.ErrorDetails.Width(width - 2).Render(finishPart.Details)
507	return fmt.Sprintf("%s\n\n%s", title, details)
508}
509
510// isSpinning returns true if the assistant message is still generating.
511func (a *AssistantMessageItem) isSpinning() bool {
512	isThinking := a.message.IsThinking()
513	isFinished := a.message.IsFinished()
514	hasContent := strings.TrimSpace(a.message.Content().Text) != ""
515	hasToolCalls := len(a.message.ToolCalls()) > 0
516	return (isThinking || !isFinished) && !hasContent && !hasToolCalls
517}
518
519// SetMessage is used to update the underlying message. Only the
520// sub-section caches whose source text or extras changed are
521// invalidated; the others survive and serve cache hits on the next
522// RawRender.
523func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd {
524	wasSpinning := a.isSpinning()
525	a.message = msg
526	// The prefix cache is keyed by a fingerprint that includes every
527	// section's source hash, so an unchanged section keeps its prefix
528	// cache valid while a changed section forces a miss naturally.
529	// Section caches themselves are content-keyed, so they do not
530	// need an explicit drop here either.
531	if !wasSpinning && a.isSpinning() {
532		return a.StartAnimation()
533	}
534	return nil
535}
536
537// clearCache drops every cached render for this item, including the
538// per-section caches. Shadows the embedded cachedMessageItem.clearCache
539// so ClearItemCaches (style change) wipes the section caches too.
540func (a *AssistantMessageItem) clearCache() {
541	a.cachedMessageItem.clearCache()
542	a.thinkingSec.reset()
543	a.contentSec.reset()
544	a.errorSec.reset()
545}
546
547// ToggleExpanded advances the F5 thinking view-mode cycle and returns
548// whether the item is now in any expanded state (tail-window or full).
549// The cycle is collapsed → tail-window → full → collapsed, with the
550// tail-window step skipped when the rendered thinking fits within
551// maxExpandedThinkingTailLines so short blocks remain a two-click
552// toggle. Both the thinking section cache and the F3 prefix cache
553// fold thinkingViewMode into their keys, so no explicit invalidation
554// is required here.
555//
556// When the message carries no thinking text the toggle is a no-op:
557// there is nothing to expand, and mutating the view mode would
558// thrash the thinking-section cache key for no visible benefit.
559func (a *AssistantMessageItem) ToggleExpanded() bool {
560	if strings.TrimSpace(a.message.ReasoningContent().Thinking) == "" {
561		return a.thinkingViewMode != thinkingCollapsed
562	}
563	switch a.thinkingViewMode {
564	case thinkingCollapsed:
565		if a.tailWindowWouldTruncate() {
566			a.thinkingViewMode = thinkingTailWindow
567		} else {
568			a.thinkingViewMode = thinkingFullExpanded
569		}
570	case thinkingTailWindow:
571		a.thinkingViewMode = thinkingFullExpanded
572	case thinkingFullExpanded:
573		a.thinkingViewMode = thinkingCollapsed
574	}
575	return a.thinkingViewMode != thinkingCollapsed
576}
577
578// tailWindowWouldTruncate reports whether the current thinking text
579// is long enough that the tail-window step is worth inserting into
580// the toggle cycle. We use a cheap source-text logical-line count
581// as the heuristic rather than peeking into the cache: the cache
582// may be populated in collapsed state (where its height is bounded
583// by maxCollapsedThinkingHeight and tells us nothing about the
584// underlying length), and re-running glamour just to count lines
585// would defeat the cache. The heuristic can over-trigger (a source
586// with many short lines may wrap to fewer than N lines), in which
587// case the tail-window render is visually identical to full and
588// the cycle costs the user one extra toggle — preferred over the
589// alternative of failing to show the affordance on a genuinely
590// long block.
591//
592// Logical line count is `1 + newlineCount` (a string with no
593// newlines is one line). Comparing newline count alone introduced
594// an off-by-one that let a source whose post-newline-split length
595// equalled the cap skip the tail-window step.
596func (a *AssistantMessageItem) tailWindowWouldTruncate() bool {
597	lineCount := 1 + strings.Count(a.message.ReasoningContent().Thinking, "\n")
598	return lineCount > maxExpandedThinkingTailLines
599}
600
601// HandleMouseClick implements MouseClickable. It signals (via a true return)
602// that the click lies on the thinking box so the caller can invoke
603// [AssistantMessageItem.ToggleExpanded] through the generic [Expandable]
604// path. Toggling here directly would double-toggle because the caller always
605// runs the generic path after a handled click.
606func (a *AssistantMessageItem) HandleMouseClick(btn ansi.MouseButton, x, y int) bool {
607	if btn != ansi.MouseLeft {
608		return false
609	}
610	// Only the thinking box is clickable; other regions of the assistant
611	// message should not trigger expansion.
612	return a.thinkingBoxHeight > 0 && y < a.thinkingBoxHeight
613}
614
615// HandleKeyEvent implements KeyEventHandler.
616func (a *AssistantMessageItem) HandleKeyEvent(key tea.KeyMsg) (bool, tea.Cmd) {
617	if k := key.String(); k == "c" || k == "y" {
618		text := a.message.Content().Text
619		return true, common.CopyToClipboard(text, "Message copied to clipboard")
620	}
621	return false, nil
622}