diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 66062270c669ffaba298980ff997c6e2c2e04c2e..ba1215e9a22ca425f745e6e00c85908737d50a0e 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -245,6 +245,15 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy defer cancel() defer a.activeRequests.Del(call.SessionID) + // Drain any debounced message updates before returning. message.Service + // already flushes synchronously on terminal updates, but a defer here + // guarantees the contract at every Run exit (success, error, panic + // recovery upstream) without callers needing to know. + defer func() { + if flushErr := a.messages.FlushAll(ctx); flushErr != nil { + slog.Error("Failed to flush pending message updates after run", "error", flushErr) + } + }() history, files := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages, call.Attachments...) @@ -653,6 +662,11 @@ func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fan a.activeRequests.Set(sessionID, cancel) defer a.activeRequests.Del(sessionID) defer cancel() + defer func() { + if flushErr := a.messages.FlushAll(ctx); flushErr != nil { + slog.Error("Failed to flush pending message updates after summarize", "error", flushErr) + } + }() agent := fantasy.NewAgent(largeModel.Model, fantasy.WithSystemPrompt(string(summaryPrompt)), diff --git a/internal/app/app.go b/internal/app/app.go index b28b91851402892e8bb3aba0c0a007a5f1e9485d..f627cc928fa62abaa51ae1a7f92351ee4afaad7e 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -584,13 +584,23 @@ func (app *App) Shutdown() { app.AgentCoordinator.CancelAll() } - // Now run remaining cleanup tasks in parallel. - var wg sync.WaitGroup - // Shared shutdown context for all timeout-bounded cleanup. shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() + // Drain any debounced message updates before the DB-close cleanup + // runs in the parallel block below. message.Service buffers + // streaming deltas (see internal/message/message.go) and we must + // land them while the connection is still open. + if app.Messages != nil { + if err := app.Messages.FlushAll(shutdownCtx); err != nil { + slog.Error("Failed to flush pending message updates on shutdown", "error", err) + } + } + + // Now run remaining cleanup tasks in parallel. + var wg sync.WaitGroup + // Send exit event wg.Go(func() { event.AppExited() diff --git a/internal/backend/session.go b/internal/backend/session.go index 10e21ed8932ccbc990a525785166517cd231595c..9542282319f766e384d9ffe7942e9c235ae63e29 100644 --- a/internal/backend/session.go +++ b/internal/backend/session.go @@ -72,6 +72,12 @@ func (b *Backend) ListSessionMessages(ctx context.Context, workspaceID, sessionI return nil, err } + // Drain debounced updates so HTTP clients (and the TUI on session + // switch) observe the latest in-memory state rather than racing the + // debounce timer in message.Service. + if err := ws.Messages.FlushAll(ctx); err != nil { + return nil, err + } return ws.Messages.List(ctx, sessionID) } diff --git a/internal/message/message.go b/internal/message/message.go index 6da8827b72227602dc36c39b6a2254aba18d2b0d..9e8d258c543d9f9aed31518b848439456b5e4336 100644 --- a/internal/message/message.go +++ b/internal/message/message.go @@ -5,6 +5,7 @@ import ( "database/sql" "encoding/json" "fmt" + "sync" "time" "github.com/charmbracelet/crush/internal/db" @@ -12,6 +13,13 @@ import ( "github.com/google/uuid" ) +// defaultUpdateDebounce is the default debounce window for [Service.Update]. +// Streaming deltas that arrive within the window are coalesced into a +// single SQL write and a single pubsub event. Terminal updates +// (finish/error/cancel/tool-call structural changes) bypass the +// debounce and flush synchronously. +const defaultUpdateDebounce = 33 * time.Millisecond + type CreateMessageParams struct { Role MessageRole Parts []ContentPart @@ -20,6 +28,21 @@ type CreateMessageParams struct { IsSummaryMessage bool } +// Service is the public interface to the message store. +// +// [Service.Update] is eventually consistent: it accepts new state into +// an in-memory buffer and writes it to SQLite plus publishes a +// [pubsub.UpdatedEvent] on the next debounce tick (default +// [defaultUpdateDebounce]) or on the next terminal-state update, +// whichever comes first. Terminal-state updates — those that finish +// the message, add or finish a tool call, or end a reasoning section — +// flush synchronously before [Service.Update] returns. +// +// Callers that need stronger ordering (e.g. tests, shutdown, +// session-switch reads) must use [Service.Flush] or [Service.FlushAll] +// before reading via [Service.Get] / [Service.List]. Without an +// explicit flush, a read can race the debounce timer and miss the +// most recent in-memory state. type Service interface { pubsub.Subscriber[Message] Create(ctx context.Context, sessionID string, params CreateMessageParams) (Message, error) @@ -30,20 +53,87 @@ type Service interface { ListAllUserMessages(ctx context.Context) ([]Message, error) Delete(ctx context.Context, id string) error DeleteSessionMessages(ctx context.Context, sessionID string) error + + // Flush synchronously drains any pending debounced state for the + // given message ID, performs the SQL write, and publishes the + // resulting [pubsub.UpdatedEvent]. Idempotent; cheap no-op if no + // updates are pending. Use this before any read that must observe + // the latest [Service.Update]. + Flush(ctx context.Context, id string) error + + // FlushAll synchronously drains pending debounced state for every + // message known to the service. Intended for shutdown and + // session-switch paths. + FlushAll(ctx context.Context) error +} + +// pendingState holds the in-memory coalescing buffer for a single +// message ID. All fields except where noted are guarded by +// service.mu. The flushing flag serializes concurrent flushers for +// the same ID so SQL writes never reorder. +type pendingState struct { + // latest is the most recent [Message] passed to [Service.Update] + // that has not yet been flushed. + latest Message + + // dirty is true when latest contains state that has not been + // written to SQL since the last successful flush. + dirty bool + + // flushing is true while a goroutine is performing the SQL write + // for this ID. New updates are still accepted (and re-mark dirty) + // but other flushers must back off. + flushing bool + + // timer is the active debounce timer, or nil if no flush is + // scheduled. Stopped and reset when a terminal update preempts + // the debounce window. + timer *time.Timer + + // lastFlushed is the snapshot most recently written to SQL. Used + // as the baseline for terminal-state detection. + lastFlushed Message + + // hasFlushed is false until the first successful write for this + // ID; until then lastFlushed is the zero value and must not be + // treated as a real prior state. + hasFlushed bool } type service struct { *pubsub.Broker[Message] - q db.Querier + q db.Querier + debounce time.Duration + + mu sync.Mutex + pending map[string]*pendingState } -func NewService(q db.Querier) Service { - return &service{ - Broker: pubsub.NewBroker[Message](), - q: q, +// ServiceOption configures a [Service] at construction. +type ServiceOption func(*service) + +// WithDebounce overrides the debounce window for [Service.Update]. A +// zero or negative value disables debouncing entirely (every update +// flushes synchronously). Intended primarily for tests. +func WithDebounce(d time.Duration) ServiceOption { + return func(s *service) { + s.debounce = d } } +func NewService(q db.Querier, opts ...ServiceOption) Service { + s := &service{ + Broker: pubsub.NewBroker[Message](), + q: q, + debounce: defaultUpdateDebounce, + pending: make(map[string]*pendingState), + } + for _, opt := range opts { + opt(s) + } + return s +} + func (s *service) Delete(ctx context.Context, id string) error { message, err := s.Get(ctx, id) if err != nil { @@ -53,6 +143,16 @@ func (s *service) Delete(ctx context.Context, id string) error { if err != nil { return err } + // Drop any pending coalesced state for this ID. We never want to + // flush back over a deleted row. + s.mu.Lock() + if p, ok := s.pending[id]; ok { + if p.timer != nil { + p.timer.Stop() + } + delete(s.pending, id) + } + s.mu.Unlock() // Clone the message before publishing to avoid race conditions with // concurrent modifications to the Parts slice. s.Publish(pubsub.DeletedEvent, message.Clone()) @@ -111,31 +211,240 @@ func (s *service) DeleteSessionMessages(ctx context.Context, sessionID string) e return nil } -func (s *service) Update(ctx context.Context, message Message) error { - parts, err := marshalParts(message.Parts) +// Update accepts a new state for a message and either flushes +// synchronously (terminal updates, debounce <= 0) or buffers it until +// the next debounce tick. See [Service] for the contract. +func (s *service) Update(ctx context.Context, msg Message) error { + cloned := msg.Clone() + + // Zero or negative debounce: flush every update synchronously. This + // preserves the pre-coalescing behaviour for tests and any caller + // that explicitly opted out via [WithDebounce]. + if s.debounce <= 0 { + s.mu.Lock() + p, ok := s.pending[msg.ID] + if !ok { + p = &pendingState{} + s.pending[msg.ID] = p + } + p.latest = cloned + p.dirty = true + s.mu.Unlock() + return s.flushOne(ctx, msg.ID, true) + } + + s.mu.Lock() + p, ok := s.pending[msg.ID] + if !ok { + p = &pendingState{} + s.pending[msg.ID] = p + } + p.latest = cloned + p.dirty = true + + var prev *Message + if p.hasFlushed { + prev = &p.lastFlushed + } + terminal := shouldFlushNow(prev, &cloned) + + if terminal { + if p.timer != nil { + p.timer.Stop() + p.timer = nil + } + s.mu.Unlock() + return s.flushOne(ctx, msg.ID, true) + } + + // Debounce: schedule a single flush per pending state. If a flush + // is already running we let it finish; the trailing dirty bit will + // be picked up by the next Update or by Flush. + if p.timer == nil && !p.flushing { + id := msg.ID + p.timer = time.AfterFunc(s.debounce, func() { + // Detached from caller ctx so a cancelled stream context + // does not strand the buffered write. + _ = s.flushOne(context.Background(), id, false) + }) + } + s.mu.Unlock() + return nil +} + +// Flush implements [Service.Flush]. +func (s *service) Flush(ctx context.Context, id string) error { + return s.flushOne(ctx, id, true) +} + +// FlushAll implements [Service.FlushAll]. It snapshots every ID with +// outstanding work — either dirty buffered state or a flush already in +// flight — then drains each one. Picking up in-flight IDs ensures +// FlushAll cannot return while a timer-fired write is still mid-SQL, +// which is what shutdown and session-switch callers rely on. +func (s *service) FlushAll(ctx context.Context) error { + s.mu.Lock() + ids := make([]string, 0, len(s.pending)) + for id, p := range s.pending { + if p.dirty || p.flushing { + ids = append(ids, id) + } + } + s.mu.Unlock() + var firstErr error + for _, id := range ids { + if err := s.flushOne(ctx, id, true); err != nil && firstErr == nil { + firstErr = err + } + } + return firstErr +} + +// flushOne drains a single message ID. When syncCaller is true the +// caller is willing to wait through a concurrent in-flight flush so +// that, on return, lastFlushed equals latest at the moment of return. +// When false (timer-fired path) we bail if another flusher is already +// running; that flusher will pick up the trailing dirty bit. +// +// Order matters: a sync caller must wait for any in-flight flush to +// drain even when the buffer is currently clean — that in-flight +// write has not yet updated the SQL row, so returning early would +// violate the contract that on success lastFlushed reflects the most +// recent state. +func (s *service) flushOne(ctx context.Context, id string, syncCaller bool) error { + for { + s.mu.Lock() + p, ok := s.pending[id] + if !ok { + s.mu.Unlock() + return nil + } + if p.flushing { + if !syncCaller { + s.mu.Unlock() + return nil + } + s.mu.Unlock() + // Brief yield; in-flight write should land in <1ms typical. + time.Sleep(time.Millisecond) + continue + } + if !p.dirty { + s.mu.Unlock() + return nil + } + + if p.timer != nil { + p.timer.Stop() + p.timer = nil + } + snap := p.latest + // Decide whether this snapshot represents a terminal event + // against the prior baseline. We must do this before resetting + // dirty/flushing because shouldFlushNow looks at p.lastFlushed + // (which is what was on disk before this write). + var prev *Message + if p.hasFlushed { + prev = &p.lastFlushed + } + isTerminal := shouldFlushNow(prev, &snap) + p.flushing = true + p.dirty = false + s.mu.Unlock() + + err := s.write(ctx, snap) + + s.mu.Lock() + p.flushing = false + if err == nil { + p.lastFlushed = snap + p.hasFlushed = true + } else { + // Restore dirty so the next caller retries. + p.dirty = true + } + // If a delta arrived during the SQL write and we are a sync + // caller, the user expects that delta to land too. + wasDirty := p.dirty + s.mu.Unlock() + + if err != nil { + return err + } + + // Terminal events — message finished, tool call added or + // finished, reasoning ended — use the bounded must-deliver + // path so they never get dropped under channel contention. + if isTerminal { + s.PublishMustDeliver(ctx, pubsub.UpdatedEvent, snap) + } else { + s.Publish(pubsub.UpdatedEvent, snap) + } + + if wasDirty && syncCaller { + continue + } + return nil + } +} + +// write performs the unguarded SQL write + UpdatedAt stamp. Caller +// owns publishing. +func (s *service) write(ctx context.Context, msg Message) error { + parts, err := marshalParts(msg.Parts) if err != nil { return err } finishedAt := sql.NullInt64{} - if f := message.FinishPart(); f != nil { + if f := msg.FinishPart(); f != nil { finishedAt.Int64 = f.Time finishedAt.Valid = true } - err = s.q.UpdateMessage(ctx, db.UpdateMessageParams{ - ID: message.ID, + if err := s.q.UpdateMessage(ctx, db.UpdateMessageParams{ + ID: msg.ID, Parts: string(parts), FinishedAt: finishedAt, - }) - if err != nil { + }); err != nil { return err } - message.UpdatedAt = time.Now().Unix() - // Clone the message before publishing to avoid race conditions with - // concurrent modifications to the Parts slice. - s.Publish(pubsub.UpdatedEvent, message.Clone()) return nil } +// shouldFlushNow returns true when next represents a structural +// change that must not be silently coalesced: the message just +// finished, the tool-call set grew, a tool call transitioned to +// finished, or reasoning just finished. prev is the last-flushed +// snapshot (or nil if no write has landed yet). +func shouldFlushNow(prev, next *Message) bool { + if next.IsFinished() { + return true + } + + var prevCalls []ToolCall + var prevReasoningFinishedAt int64 + if prev != nil { + prevCalls = prev.ToolCalls() + prevReasoningFinishedAt = prev.ReasoningContent().FinishedAt + } + nextCalls := next.ToolCalls() + if len(nextCalls) != len(prevCalls) { + return true + } + for i := range nextCalls { + // Bounds-safe: lengths are equal here. + if nextCalls[i].Finished != prevCalls[i].Finished { + return true + } + // A tool call's input only matters once it has landed (Finished + // flips true). Earlier deltas to Input are debounced with the + // rest of the streaming state. + } + if next.ReasoningContent().FinishedAt > 0 && prevReasoningFinishedAt == 0 { + return true + } + return false +} + func (s *service) Get(ctx context.Context, id string) (Message, error) { dbMessage, err := s.q.GetMessage(ctx, id) if err != nil { diff --git a/internal/message/message_test.go b/internal/message/message_test.go new file mode 100644 index 0000000000000000000000000000000000000000..279e0455de445e7eefad36714b1315ac112585c3 --- /dev/null +++ b/internal/message/message_test.go @@ -0,0 +1,695 @@ +package message + +import ( + "context" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/charmbracelet/crush/internal/db" + "github.com/charmbracelet/crush/internal/pubsub" + "github.com/charmbracelet/crush/internal/session" + "github.com/stretchr/testify/require" +) + +// slowUpdateQuerier wraps a [db.Querier] and forces UpdateMessage to +// hang on a release channel. Used to simulate an in-flight SQL write. +type slowUpdateQuerier struct { + db.Querier + release chan struct{} + started chan struct{} + startOnce sync.Once +} + +func (s *slowUpdateQuerier) UpdateMessage(ctx context.Context, arg db.UpdateMessageParams) error { + s.startOnce.Do(func() { close(s.started) }) + select { + case <-s.release: + case <-ctx.Done(): + return ctx.Err() + } + return s.Querier.UpdateMessage(ctx, arg) +} + +// newTestService spins up a fresh in-memory message.Service backed by a +// temporary on-disk SQLite database. Returns the service plus a session +// ID to attach messages to. +func newTestService(t *testing.T, opts ...ServiceOption) (Service, string) { + t.Helper() + conn, err := db.Connect(t.Context(), t.TempDir()) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + q := db.New(conn) + sessions := session.NewService(q, conn) + sess, err := sessions.Create(t.Context(), "test") + require.NoError(t, err) + + svc := NewService(q, opts...) + return svc, sess.ID +} + +// eventCollector consumes broker events into a slice in a goroutine +// and exposes thread-safe Snapshot / Reset helpers for assertions. +type eventCollector struct { + mu sync.Mutex + events []pubsub.Event[Message] +} + +func collect(ctx context.Context, sub <-chan pubsub.Event[Message]) *eventCollector { + c := &eventCollector{} + go func() { + for { + select { + case <-ctx.Done(): + return + case ev, ok := <-sub: + if !ok { + return + } + c.mu.Lock() + c.events = append(c.events, ev) + c.mu.Unlock() + } + } + }() + return c +} + +func (c *eventCollector) snapshot() []pubsub.Event[Message] { + c.mu.Lock() + defer c.mu.Unlock() + out := make([]pubsub.Event[Message], len(c.events)) + copy(out, c.events) + return out +} + +func (c *eventCollector) reset() { + c.mu.Lock() + defer c.mu.Unlock() + c.events = nil +} + +func TestUpdate_DebouncesTextDeltas(t *testing.T) { + t.Parallel() + + // Long-enough debounce that we can verify nothing flushes prematurely. + svc, sessionID := newTestService(t, WithDebounce(50*time.Millisecond)) + + subCtx, cancelSub := context.WithCancel(t.Context()) + defer cancelSub() + sub := svc.Subscribe(subCtx) + collector := collect(subCtx, sub) + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{ + Role: Assistant, + }) + require.NoError(t, err) + // Drop the CreatedEvent emitted by Create. + time.Sleep(5 * time.Millisecond) + collector.reset() + + // Push 5 deltas inside a single debounce window. + for i := 0; i < 5; i++ { + msg.AppendContent("a") + require.NoError(t, svc.Update(t.Context(), msg)) + } + + // Before the debounce expires no UpdatedEvent should have landed. + time.Sleep(10 * time.Millisecond) + require.Empty(t, collector.snapshot(), "no events should land before debounce window expires") + + // Wait for the debounce timer to fire. + require.Eventually(t, func() bool { + return len(collector.snapshot()) >= 1 + }, time.Second, 5*time.Millisecond) + events := collector.snapshot() + require.Len(t, events, 1, "5 deltas should coalesce into 1 UpdatedEvent") + require.Equal(t, pubsub.UpdatedEvent, events[0].Type) + require.Equal(t, "aaaaa", events[0].Payload.Content().Text) + + // Final state must be persisted. + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Equal(t, "aaaaa", got.Content().Text) +} + +func TestUpdate_TerminalUpdatesFlushSynchronously(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + + subCtx, cancelSub := context.WithCancel(t.Context()) + defer cancelSub() + sub := svc.Subscribe(subCtx) + collector := collect(subCtx, sub) + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + time.Sleep(5 * time.Millisecond) + collector.reset() + + // AddFinish makes the message terminal; Update must flush + // synchronously even with a 1-hour debounce. + msg.AppendContent("done") + msg.AddFinish(FinishReasonEndTurn, "", "") + require.NoError(t, svc.Update(t.Context(), msg)) + + require.Eventually(t, func() bool { + return len(collector.snapshot()) >= 1 + }, time.Second, 5*time.Millisecond, + "terminal update must publish without waiting for debounce") + events := collector.snapshot() + require.Len(t, events, 1) + require.True(t, events[0].Payload.IsFinished()) + + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.True(t, got.IsFinished()) +} + +func TestUpdate_ToolCallStructuralChangeFlushes(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + + // Adding a new tool call is a structural change → sync flush. + msg.AddToolCall(ToolCall{ID: "tc1", Name: "view", Finished: false}) + require.NoError(t, svc.Update(t.Context(), msg)) + + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Len(t, got.ToolCalls(), 1) + require.Equal(t, "tc1", got.ToolCalls()[0].ID) + + // Marking the tool call finished is also a structural change. + msg.AddToolCall(ToolCall{ID: "tc1", Name: "view", Input: "{}", Finished: true}) + require.NoError(t, svc.Update(t.Context(), msg)) + + got, err = svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.True(t, got.ToolCalls()[0].Finished) +} + +func TestUpdate_ReasoningEndFlushes(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + + // Reasoning deltas alone debounce. + msg.AppendReasoningContent("hmm") + require.NoError(t, svc.Update(t.Context(), msg)) + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Empty(t, got.ReasoningContent().Thinking, "reasoning delta should still be in the debounce buffer") + + // FinishThinking sets FinishedAt → terminal flush. + msg.FinishThinking() + require.NoError(t, svc.Update(t.Context(), msg)) + + got, err = svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Equal(t, "hmm", got.ReasoningContent().Thinking) + require.NotZero(t, got.ReasoningContent().FinishedAt) +} + +func TestFlush_DrainsPendingDebouncedUpdates(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + msg.AppendContent("buffered") + require.NoError(t, svc.Update(t.Context(), msg)) + + // Without a flush the SQL row is unchanged from Create. + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Empty(t, got.Content().Text) + + require.NoError(t, svc.Flush(t.Context(), msg.ID)) + + got, err = svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Equal(t, "buffered", got.Content().Text) + + // Subsequent Flush is a no-op. + require.NoError(t, svc.Flush(t.Context(), msg.ID)) +} + +func TestFlushAll_DrainsAllPending(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + + const n = 5 + msgs := make([]Message, n) + for i := range msgs { + m, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + m.AppendContent("hi") + require.NoError(t, svc.Update(t.Context(), m)) + msgs[i] = m + } + + require.NoError(t, svc.FlushAll(t.Context())) + + for _, m := range msgs { + got, err := svc.Get(t.Context(), m.ID) + require.NoError(t, err) + require.Equal(t, "hi", got.Content().Text, "FlushAll should drain every pending message") + } +} + +func TestUpdate_OrderingMatchesNonCoalesced(t *testing.T) { + t.Parallel() + + // Compare the final state after coalesced vs zero-debounce updates. + // A sequence of interleaved text/reasoning/tool-call updates must + // converge to the same final DB row either way. + build := func(svc Service, sessionID string) Message { + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + msg.AppendReasoningContent("thinking 1 ") + require.NoError(t, svc.Update(t.Context(), msg)) + msg.AppendReasoningContent("thinking 2") + require.NoError(t, svc.Update(t.Context(), msg)) + msg.FinishThinking() + require.NoError(t, svc.Update(t.Context(), msg)) + msg.AppendContent("hello ") + require.NoError(t, svc.Update(t.Context(), msg)) + msg.AppendContent("world") + require.NoError(t, svc.Update(t.Context(), msg)) + msg.AddToolCall(ToolCall{ID: "tc", Name: "x", Finished: false}) + require.NoError(t, svc.Update(t.Context(), msg)) + msg.AddToolCall(ToolCall{ID: "tc", Name: "x", Input: "{}", Finished: true}) + require.NoError(t, svc.Update(t.Context(), msg)) + msg.AddFinish(FinishReasonEndTurn, "", "") + require.NoError(t, svc.Update(t.Context(), msg)) + return msg + } + + coalesced, sid1 := newTestService(t, WithDebounce(20*time.Millisecond)) + a := build(coalesced, sid1) + require.NoError(t, coalesced.FlushAll(t.Context())) + gotA, err := coalesced.Get(t.Context(), a.ID) + require.NoError(t, err) + + immediate, sid2 := newTestService(t, WithDebounce(0)) + b := build(immediate, sid2) + gotB, err := immediate.Get(t.Context(), b.ID) + require.NoError(t, err) + + require.Equal(t, gotA.Content().Text, gotB.Content().Text) + require.Equal(t, gotA.ReasoningContent().Thinking, gotB.ReasoningContent().Thinking) + require.Equal(t, len(gotA.ToolCalls()), len(gotB.ToolCalls())) + require.Equal(t, gotA.IsFinished(), gotB.IsFinished()) +} + +func TestDelete_DropsPendingState(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + msg.AppendContent("dropped") + require.NoError(t, svc.Update(t.Context(), msg)) + + require.NoError(t, svc.Delete(t.Context(), msg.ID)) + + // FlushAll after Delete must not write to the deleted row. + require.NoError(t, svc.FlushAll(t.Context())) + + _, err = svc.Get(t.Context(), msg.ID) + require.Error(t, err, "deleted message must remain deleted") +} + +func TestBroker_PublishLossyDropCounter(t *testing.T) { + t.Parallel() + + // Tiny channel buffer so we can saturate from a single sender. + b := pubsub.NewBrokerWithOptions[int](1) + defer b.Shutdown() + + subCtx, cancel := context.WithCancel(t.Context()) + defer cancel() + sub := b.Subscribe(subCtx) + require.NotNil(t, sub) + + // Don't read from sub. Saturate the buffer. + for range 100 { + b.Publish(pubsub.UpdatedEvent, 1) + } + + require.GreaterOrEqual(t, b.DropCount(), uint64(1), + "lossy Publish must increment the drop counter under contention") +} + +func TestBroker_PublishMustDeliverHonorsTimeout(t *testing.T) { + t.Parallel() + + b := pubsub.NewBrokerWithOptions[int](1) + b.SetMustDeliverTimeout(20 * time.Millisecond) + defer b.Shutdown() + + subCtx, cancel := context.WithCancel(t.Context()) + defer cancel() + sub := b.Subscribe(subCtx) + require.NotNil(t, sub) + + // Saturate: one event sits in the buffer, the second must wait. + b.Publish(pubsub.UpdatedEvent, 1) + + // PublishMustDeliver should block up to 20ms then drop. + start := time.Now() + b.PublishMustDeliver(t.Context(), pubsub.UpdatedEvent, 2) + elapsed := time.Since(start) + + require.GreaterOrEqual(t, elapsed, 20*time.Millisecond, + "PublishMustDeliver should block at least the timeout under contention") + require.Less(t, elapsed, 200*time.Millisecond, + "PublishMustDeliver must not block indefinitely") + require.GreaterOrEqual(t, b.MustDeliverDropCount(), uint64(1), + "timeout must increment the must-deliver drop counter") +} + +func TestBroker_PublishMustDeliverWithReader(t *testing.T) { + t.Parallel() + + b := pubsub.NewBrokerWithOptions[int](1) + b.SetMustDeliverTimeout(50 * time.Millisecond) + defer b.Shutdown() + + subCtx, cancel := context.WithCancel(t.Context()) + defer cancel() + sub := b.Subscribe(subCtx) + + var received atomic.Uint64 + done := make(chan struct{}) + go func() { + defer close(done) + for { + select { + case <-subCtx.Done(): + return + case _, ok := <-sub: + if !ok { + return + } + received.Add(1) + } + } + }() + + for i := range 10 { + b.PublishMustDeliver(t.Context(), pubsub.UpdatedEvent, i) + } + + // All 10 should land within the must-deliver timeout window. + require.Eventually(t, func() bool { return received.Load() == 10 }, + time.Second, 5*time.Millisecond, + "all must-deliver events should reach an active subscriber") + require.Zero(t, b.MustDeliverDropCount(), + "no drops expected when subscriber drains promptly") +} + +func TestUpdate_TerminalEventUsesMustDeliver(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(time.Hour)) + + subCtx, cancel := context.WithCancel(t.Context()) + defer cancel() + sub := svc.Subscribe(subCtx) + + var seenFinish atomic.Bool + done := make(chan struct{}) + go func() { + defer close(done) + for { + select { + case <-subCtx.Done(): + return + case ev, ok := <-sub: + if !ok { + return + } + if ev.Type == pubsub.UpdatedEvent && ev.Payload.IsFinished() { + seenFinish.Store(true) + } + } + } + }() + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + msg.AppendContent("final") + msg.AddFinish(FinishReasonEndTurn, "", "") + require.NoError(t, svc.Update(t.Context(), msg)) + + require.Eventually(t, func() bool { return seenFinish.Load() }, + time.Second, 10*time.Millisecond, + "terminal update must reach subscribers via the must-deliver path") +} + +func TestUpdate_ZeroDebounceFlushesEveryUpdate(t *testing.T) { + t.Parallel() + + svc, sessionID := newTestService(t, WithDebounce(0)) + + msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + + for i := 0; i < 3; i++ { + msg.AppendContent("x") + require.NoError(t, svc.Update(t.Context(), msg)) + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Len(t, got.Content().Text, i+1, "every update must land synchronously when debounce is 0") + } +} + +// TestFlush_WaitsForInFlightWrite reproduces the failure where Flush +// or FlushAll could return before a concurrent in-flight SQL write +// completed. We block UpdateMessage on a release channel, fire the +// debounce timer, then call Flush and assert it does not return until +// the in-flight write actually lands. +func TestFlush_WaitsForInFlightWrite(t *testing.T) { + t.Parallel() + + conn, err := db.Connect(t.Context(), t.TempDir()) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + q := db.New(conn) + sessions := session.NewService(q, conn) + sess, err := sessions.Create(t.Context(), "test") + require.NoError(t, err) + + slow := &slowUpdateQuerier{ + Querier: q, + release: make(chan struct{}), + started: make(chan struct{}), + } + // Short debounce so the timer fires quickly. + svc := NewService(slow, WithDebounce(10*time.Millisecond)) + + msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + msg.AppendContent("payload") + require.NoError(t, svc.Update(t.Context(), msg)) + + // Wait for the timer-fired flush to enter UpdateMessage. + select { + case <-slow.started: + case <-time.After(time.Second): + t.Fatal("timer-fired flush never reached UpdateMessage") + } + + // At this point the buffer is dirty=false but flushing=true. A + // naive Flush would early-return on !dirty. Spawn Flush in a + // goroutine and assert it has not returned while the write is + // still blocked. + flushDone := make(chan error, 1) + go func() { flushDone <- svc.Flush(t.Context(), msg.ID) }() + + select { + case err := <-flushDone: + t.Fatalf("Flush returned %v while in-flight write was still blocked", err) + case <-time.After(50 * time.Millisecond): + // Expected: Flush is correctly waiting. + } + + // Release the slow write; Flush must now return cleanly. + close(slow.release) + select { + case err := <-flushDone: + require.NoError(t, err) + case <-time.After(time.Second): + t.Fatal("Flush did not return after in-flight write completed") + } + + // The SQL row should now reflect the buffered payload. + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Equal(t, "payload", got.Content().Text) +} + +// TestFlushAll_WaitsForInFlightWrite asserts FlushAll picks up IDs +// whose buffer is currently flushing (dirty=false) so shutdown and +// session-switch callers don't return while an SQL write is mid-flight. +func TestFlushAll_WaitsForInFlightWrite(t *testing.T) { + t.Parallel() + + conn, err := db.Connect(t.Context(), t.TempDir()) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + q := db.New(conn) + sessions := session.NewService(q, conn) + sess, err := sessions.Create(t.Context(), "test") + require.NoError(t, err) + + slow := &slowUpdateQuerier{ + Querier: q, + release: make(chan struct{}), + started: make(chan struct{}), + } + svc := NewService(slow, WithDebounce(10*time.Millisecond)) + + msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + msg.AppendContent("payload") + require.NoError(t, svc.Update(t.Context(), msg)) + + select { + case <-slow.started: + case <-time.After(time.Second): + t.Fatal("timer-fired flush never reached UpdateMessage") + } + + flushDone := make(chan error, 1) + go func() { flushDone <- svc.FlushAll(t.Context()) }() + + select { + case err := <-flushDone: + t.Fatalf("FlushAll returned %v while in-flight write was still blocked", err) + case <-time.After(50 * time.Millisecond): + } + + close(slow.release) + select { + case err := <-flushDone: + require.NoError(t, err) + case <-time.After(time.Second): + t.Fatal("FlushAll did not return after in-flight write completed") + } + + got, err := svc.Get(t.Context(), msg.ID) + require.NoError(t, err) + require.Equal(t, "payload", got.Content().Text) +} + +// TestUpdate_StructuralFlushUsesMustDeliver covers the second review +// finding: structural terminal events (tool-call add, tool-call +// finish, reasoning end) must publish via the must-deliver path even +// when the message itself is not yet IsFinished. +// +// We detect which path was taken by saturating a subscriber's channel +// buffer with no reader. With a short must-deliver timeout, the +// must-deliver path increments [pubsub.Broker.MustDeliverDropCount] +// after the timeout expires; the lossy path increments +// [pubsub.Broker.DropCount] immediately. The two counters are +// disjoint, so they precisely identify which call site published the +// event. +func TestUpdate_StructuralFlushUsesMustDeliver(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + mut func(*Message) + }{ + { + name: "tool call add", + mut: func(m *Message) { + m.AddToolCall(ToolCall{ID: "tc1", Name: "view"}) + }, + }, + { + name: "tool call finish", + mut: func(m *Message) { + m.AddToolCall(ToolCall{ID: "tc1", Name: "view", Input: "{}", Finished: true}) + }, + }, + { + name: "reasoning end", + mut: func(m *Message) { + m.AppendReasoningContent("hmm") + m.FinishThinking() + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + conn, err := db.Connect(t.Context(), t.TempDir()) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + q := db.New(conn) + sessions := session.NewService(q, conn) + sess, err := sessions.Create(t.Context(), "test") + require.NoError(t, err) + + // Replace the default broker with a tiny buffer + short + // must-deliver timeout so we can fully saturate from a + // single sender and observe drops without long waits. + svc := NewService(q, WithDebounce(time.Hour)) + impl := svc.(*service) + impl.Shutdown() + impl.Broker = pubsub.NewBrokerWithOptions[Message](1) + impl.SetMustDeliverTimeout(40 * time.Millisecond) + + subCtx, cancel := context.WithCancel(t.Context()) + defer cancel() + sub := svc.Subscribe(subCtx) + + msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant}) + require.NoError(t, err) + + // Saturate the subscriber's buffer (capacity 1). The + // CreatedEvent from Create above already left one event + // in the buffer; we never read sub, so the next publish + // has nowhere to go. + _ = sub // intentionally not drained. + + // Drive the structural change. With debounce=1h, Update + // flushes synchronously and routes through whichever + // publish path the service chose for structural events. + tc.mut(&msg) + require.NoError(t, svc.Update(t.Context(), msg)) + + // Must-deliver timeout (40ms) should have expired with + // no drain. If structural events are routed through + // must-deliver: MustDeliverDropCount > 0, DropCount + // unchanged. If routed through lossy Publish: + // DropCount > 0, MustDeliverDropCount == 0. + require.Eventually(t, func() bool { + return impl.MustDeliverDropCount() >= 1 + }, time.Second, 5*time.Millisecond, + "structural terminal event should publish via must-deliver, not lossy Publish") + require.Zero(t, impl.DropCount(), + "structural terminal event must not be silently dropped via lossy Publish") + }) + } +} diff --git a/internal/pubsub/broker.go b/internal/pubsub/broker.go index bb02bfcce0fbe807890f81ace0d18f0c848bbb29..412ce4de3a870d361207ded257f0c294a74ff4c9 100644 --- a/internal/pubsub/broker.go +++ b/internal/pubsub/broker.go @@ -1,24 +1,59 @@ +// Package pubsub provides a lightweight in-process broker for fan-out +// event delivery between services and the UI. +// +// Delivery semantics: +// +// - [Broker.Publish] is best-effort and lossy under contention. If a +// subscriber's channel is full, the event is dropped for that +// subscriber, a warning is logged, and a counter is incremented. +// This is the right choice for high-frequency intermediate updates +// (e.g. streaming token deltas) where only the latest state +// matters. +// +// - [Broker.PublishMustDeliver] is bounded-blocking. For each +// subscriber it first tries a non-blocking send, then falls back to +// a per-subscriber blocking send with a hard timeout. On timeout the +// event is dropped for that subscriber, an error is logged, and the +// must-deliver drop counter is incremented. The publisher never +// blocks indefinitely. This is the right choice for terminal events +// (finish, tool result, error, cancel) that must not be silently +// coalesced away. +// +// Drop counters ([Broker.DropCount], [Broker.MustDeliverDropCount]) are +// exposed so callers can surface saturation in telemetry. package pubsub import ( "context" "log/slog" "sync" + "sync/atomic" + "time" ) -// bufferSize is the per-subscriber channel capacity for any broker -// created via NewBroker. Publish is non-blocking, so a full buffer -// drops events (with a warning log); sized to cover a long streaming -// assistant turn (~one UpdatedEvent per token) even under TUI render -// stalls. -const bufferSize = 4096 +const ( + // bufferSize is the per-subscriber channel capacity for any broker + // created via NewBroker. Publish is non-blocking, so a full buffer + // drops events (with a warning log); sized to cover a long + // streaming assistant turn (~one UpdatedEvent per token) even under + // TUI render stalls. + bufferSize = 4096 + + // defaultMustDeliverTimeout is the per-subscriber upper bound on how + // long [Broker.PublishMustDeliver] will block waiting for buffer + // space before giving up on that subscriber. + defaultMustDeliverTimeout = 50 * time.Millisecond +) type Broker[T any] struct { - subs map[chan Event[T]]struct{} - mu sync.RWMutex - done chan struct{} - subCount int - channelBufferSize int + subs map[chan Event[T]]struct{} + mu sync.RWMutex + done chan struct{} + subCount int + channelBufferSize int + mustDeliverTimeout time.Duration + dropCount atomic.Uint64 + mustDeliverDropCount atomic.Uint64 } func NewBroker[T any]() *Broker[T] { @@ -27,12 +62,26 @@ func NewBroker[T any]() *Broker[T] { func NewBrokerWithOptions[T any](channelBufferSize int) *Broker[T] { return &Broker[T]{ - subs: make(map[chan Event[T]]struct{}), - done: make(chan struct{}), - channelBufferSize: channelBufferSize, + subs: make(map[chan Event[T]]struct{}), + done: make(chan struct{}), + channelBufferSize: channelBufferSize, + mustDeliverTimeout: defaultMustDeliverTimeout, } } +// SetMustDeliverTimeout overrides the per-subscriber timeout used by +// [Broker.PublishMustDeliver]. A zero or negative value resets to the +// default. Intended primarily for tests. +func (b *Broker[T]) SetMustDeliverTimeout(d time.Duration) { + b.mu.Lock() + defer b.mu.Unlock() + if d <= 0 { + b.mustDeliverTimeout = defaultMustDeliverTimeout + return + } + b.mustDeliverTimeout = d +} + func (b *Broker[T]) Shutdown() { select { case <-b.done: // Already closed @@ -94,6 +143,25 @@ func (b *Broker[T]) GetSubscriberCount() int { return b.subCount } +// DropCount returns the cumulative number of events dropped by +// [Broker.Publish] because a subscriber's channel was full. +func (b *Broker[T]) DropCount() uint64 { + return b.dropCount.Load() +} + +// MustDeliverDropCount returns the cumulative number of events dropped +// by [Broker.PublishMustDeliver] after the per-subscriber timeout +// expired. +func (b *Broker[T]) MustDeliverDropCount() uint64 { + return b.mustDeliverDropCount.Load() +} + +// Publish delivers an event to every active subscriber. +// +// Delivery is non-blocking and lossy: if a subscriber's channel is full +// the event is dropped for that subscriber, a warning is logged, and +// [Broker.DropCount] is incremented. Use [Broker.PublishMustDeliver] +// for events that must not be silently dropped. func (b *Broker[T]) Publish(t EventType, payload T) { b.mu.RLock() defer b.mu.RUnlock() @@ -111,8 +179,58 @@ func (b *Broker[T]) Publish(t EventType, payload T) { case sub <- event: default: // Channel is full, subscriber is slow — skip this event. - // This prevents blocking the publisher. + // Lossy by design; counted and logged so saturation is + // observable. + b.dropCount.Add(1) slog.Warn("Pubsub buffer full; dropping event", "type", t) } } } + +// PublishMustDeliver delivers an event with bounded-blocking semantics. +// For each subscriber it first attempts a non-blocking send, then falls +// back to a blocking send bounded by a per-subscriber timeout (default +// [defaultMustDeliverTimeout]). On timeout the event is dropped for +// that subscriber, [Broker.MustDeliverDropCount] is incremented, and an +// error is logged. The publisher never blocks indefinitely. +// +// Use this for terminal events that must reach subscribers (finish, +// tool result, error, cancel). Callers must still tolerate rare drops +// after timeout — recovery is the subscriber's responsibility (e.g. a +// re-fetch on the next session-visible event). +func (b *Broker[T]) PublishMustDeliver(ctx context.Context, t EventType, payload T) { + b.mu.RLock() + defer b.mu.RUnlock() + + select { + case <-b.done: + return + default: + } + + event := Event[T]{Type: t, Payload: payload} + timeout := b.mustDeliverTimeout + + for sub := range b.subs { + // Fast path: non-blocking send. + select { + case sub <- event: + continue + default: + } + + // Slow path: bounded blocking send. + timer := time.NewTimer(timeout) + select { + case sub <- event: + timer.Stop() + case <-timer.C: + b.mustDeliverDropCount.Add(1) + slog.Error("PublishMustDeliver timed out delivering event", + "type", t, "timeout", timeout) + case <-ctx.Done(): + timer.Stop() + return + } + } +} diff --git a/internal/ui/anim/anim.go b/internal/ui/anim/anim.go index deddbff2f0b88681a05120b3f85debc4b1a5404e..2479e7f93052a731db84d8c7c3257f17891e6366 100644 --- a/internal/ui/anim/anim.go +++ b/internal/ui/anim/anim.go @@ -30,9 +30,14 @@ const ( // change every 8 frames (400 milliseconds). ellipsisAnimSpeed = 8 - // The maximum amount of time that can pass before a character appears. - // This is used to create a staggered entrance effect. - maxBirthOffset = time.Second + // The maximum number of animation steps that can pass before a + // character appears. With fps == 20 this is ~1s of staggered + // entrance, identical to the previous wall-clock-driven value. + // Switching from wall-clock + rand to a step-driven birth schedule + // keeps Render() deterministic: two Anim instances built from the + // same Settings produce byte-identical output when no Animate ticks + // have advanced their step counter. + maxBirthSteps = 20 // Number of frames to prerender for the animation. After this number // of frames, the animation will loop. This only applies when color @@ -107,12 +112,12 @@ type Anim struct { label *csync.Slice[string] labelWidth int labelColor color.Color - startTime time.Time - birthOffsets []time.Duration + birthSteps []int initialFrames [][]string // frames for the initial characters initialized atomic.Bool cyclingFrames [][]string // frames for the cycling characters - step atomic.Int64 // current main frame step + step atomic.Int64 // current main frame step (wraps) + framesSinceStart atomic.Int64 // total Animate ticks (does not wrap) ellipsisStep atomic.Int64 // current ellipsis frame step ellipsisFrames *csync.Slice[string] // ellipsis animation frames id string @@ -140,7 +145,6 @@ func New(opts Settings) *Anim { } else { a.id = fmt.Sprintf("%d", nextID()) } - a.startTime = time.Now() a.cyclingCharWidth = opts.Size a.labelColor = opts.LabelColor @@ -207,7 +211,13 @@ func New(opts Settings) *Anim { } } - // Prerender scrambled rune frames for the animation. + // Prerender scrambled rune frames for the animation. Seed + // the rune picker off the settings hash so cyclingFrames is + // a pure function of Settings: two processes with identical + // Settings populate the cache with the same glyphs, which + // keeps any cross-process golden-file comparison stable. + seed := xxh3.HashString(cacheKey) + rng := rand.New(rand.NewPCG(seed, ^seed)) a.cyclingFrames = make([][]string, numFrames) offset = 0 for i := range a.cyclingFrames { @@ -219,7 +229,7 @@ func New(opts Settings) *Anim { // Also prerender the color with Lip Gloss here to avoid processing // in the render loop. - r := availableRunes[rand.IntN(len(availableRunes))] + r := availableRunes[rng.IntN(len(availableRunes))] a.cyclingFrames[i][j] = lipgloss.NewStyle(). Foreground(ramp[j+offset]). Render(string(r)) @@ -249,10 +259,20 @@ func New(opts Settings) *Anim { animCacheMap.Set(cacheKey, cached) } - // Random assign a birth to each character for a stagged entrance effect. - a.birthOffsets = make([]time.Duration, a.width) - for i := range a.birthOffsets { - a.birthOffsets[i] = time.Duration(rand.N(int64(maxBirthOffset))) * time.Nanosecond + // Assign a deterministic birth step to each column for a + // staggered entrance effect. The schedule is seeded off the + // spinner id and the settings hash, so two spinners with the + // same role and identity stagger identically (this is what + // keeps Render() byte-equal across cache hits and across + // processes for the same Settings+ID) while spinners with + // different ids — distinct assistant messages, different tool + // calls, "Thinking" vs "Generating" labels — fade in with + // different patterns instead of marching in lock-step. + birthSeed := xxh3.HashString(a.id + "|" + cacheKey) + birthRng := rand.New(rand.NewPCG(birthSeed, ^birthSeed)) + a.birthSteps = make([]int, a.width) + for i := range a.birthSteps { + a.birthSteps[i] = birthRng.IntN(maxBirthSteps) } return a @@ -333,13 +353,14 @@ func (a *Anim) Animate(msg StepMsg) tea.Cmd { a.step.Store(0) } + frames := a.framesSinceStart.Add(1) if a.initialized.Load() && a.labelWidth > 0 { // Manage the ellipsis animation. ellipsisStep := a.ellipsisStep.Add(1) if int(ellipsisStep) >= ellipsisAnimSpeed*len(ellipsisFrames) { a.ellipsisStep.Store(0) } - } else if !a.initialized.Load() && time.Since(a.startTime) >= maxBirthOffset { + } else if !a.initialized.Load() && int(frames) >= maxBirthSteps { a.initialized.Store(true) } return a.Step() @@ -349,10 +370,11 @@ func (a *Anim) Animate(msg StepMsg) tea.Cmd { func (a *Anim) Render() string { var b strings.Builder step := int(a.step.Load()) + frames := int(a.framesSinceStart.Load()) for i := range a.width { switch { - case !a.initialized.Load() && i < len(a.birthOffsets) && time.Since(a.startTime) < a.birthOffsets[i]: - // Birth offset not reached: render initial character. + case !a.initialized.Load() && i < len(a.birthSteps) && frames < a.birthSteps[i]: + // Birth step not reached: render initial character. b.WriteString(a.initialFrames[step][i]) case i < a.cyclingCharWidth: // Render a cycling character. diff --git a/internal/ui/chat/agent.go b/internal/ui/chat/agent.go index 882971290d7af78e21add7663f993731c618a967..8342ebaf8345dae6caf16db6b44aad75b97e367c 100644 --- a/internal/ui/chat/agent.go +++ b/internal/ui/chat/agent.go @@ -53,11 +53,21 @@ func NewAgentToolMessageItem( } // Animate progresses the message animation if it should be spinning. +// +// Bumps the parent's F6 list-cache version on both the parent-tick and +// nested-tick branches. Nested tools are not list entries of their +// own — their IDs map to this parent's index in idInxMap +// (internal/ui/model/chat.go:240-246) and their renders are embedded +// inline in this parent's output — so the list only checks the +// parent's version. Without the bump, the list cache would serve the +// previously rendered frame indefinitely and the spinner would appear +// frozen. func (a *AgentToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd { if a.result != nil || a.Status() == ToolStatusCanceled { return nil } if msg.ID == a.ID() { + a.Bump() return a.anim.Animate(msg) } for _, nestedTool := range a.nestedTools { @@ -65,6 +75,7 @@ func (a *AgentToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd { continue } if s, ok := nestedTool.(Animatable); ok { + a.Bump() return s.Animate(msg) } } @@ -77,9 +88,23 @@ func (a *AgentToolMessageItem) NestedTools() []ToolMessageItem { } // SetNestedTools sets the nested tools. +// +// SetNestedTools always bumps the version. The previous design +// deduped when the slice's length and element pointers were +// unchanged, but the live update path in internal/ui/model/ui.go +// mutates existing children in place (SetToolCall / SetResult on the +// same pointers) and then calls SetNestedTools with the same slice. +// Pointer-equality dedupe in that case skips the parent Bump even +// though the parent's rendered output (which embeds the children +// inline) has changed, leaving a stale parent entry in the list +// cache. Always bumping is cheap (one uint64 increment) and called +// at most once per agent event; in the rare case the slice is +// truly unchanged the worst case is one extra parent re-render +// while every child cache hit stays warm. func (a *AgentToolMessageItem) SetNestedTools(tools []ToolMessageItem) { a.nestedTools = tools a.clearCache() + a.Bump() } // AddNestedTool adds a nested tool. @@ -90,6 +115,7 @@ func (a *AgentToolMessageItem) AddNestedTool(tool ToolMessageItem) { } a.nestedTools = append(a.nestedTools, tool) a.clearCache() + a.Bump() } // AgentToolRenderContext renders agent tool messages. @@ -196,15 +222,44 @@ func NewAgenticFetchToolMessageItem( return t } +// Animate progresses the message animation if it should be spinning. +// See [AgentToolMessageItem.Animate] for the parent-bump rationale — +// without an override, the embedded base.Animate would (a) drop +// StepMsgs whose ID matches a nested child instead of the parent +// (anim.Animate's ID check at internal/ui/anim/anim.go:326-329 +// silently returns nil), and (b) never invalidate the parent's +// list-cache entry on a parent tick. +func (a *AgenticFetchToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd { + if a.result != nil || a.Status() == ToolStatusCanceled { + return nil + } + if msg.ID == a.ID() { + a.Bump() + return a.anim.Animate(msg) + } + for _, nestedTool := range a.nestedTools { + if msg.ID != nestedTool.ID() { + continue + } + if s, ok := nestedTool.(Animatable); ok { + a.Bump() + return s.Animate(msg) + } + } + return nil +} + // NestedTools returns the nested tools. func (a *AgenticFetchToolMessageItem) NestedTools() []ToolMessageItem { return a.nestedTools } -// SetNestedTools sets the nested tools. +// SetNestedTools sets the nested tools. Always bumps the version; +// see [AgentToolMessageItem.SetNestedTools] for the rationale. func (a *AgenticFetchToolMessageItem) SetNestedTools(tools []ToolMessageItem) { a.nestedTools = tools a.clearCache() + a.Bump() } // AddNestedTool adds a nested tool. @@ -215,6 +270,7 @@ func (a *AgenticFetchToolMessageItem) AddNestedTool(tool ToolMessageItem) { } a.nestedTools = append(a.nestedTools, tool) a.clearCache() + a.Bump() } // AgenticFetchToolRenderContext renders agentic fetch tool messages. diff --git a/internal/ui/chat/applyhighlight_callback_test.go b/internal/ui/chat/applyhighlight_callback_test.go new file mode 100644 index 0000000000000000000000000000000000000000..f008c0e2c74a91b277670e4e6a9f9ee6c3a0cff4 --- /dev/null +++ b/internal/ui/chat/applyhighlight_callback_test.go @@ -0,0 +1,207 @@ +package chat + +import ( + "testing" + + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/ui/list" + "github.com/charmbracelet/crush/internal/ui/styles" + "github.com/stretchr/testify/require" +) + +// renderCountingItem wraps a real chat item and counts Render calls +// to expose the list-level cache behaviour to tests. The wrapper +// forwards the list.Item methods exercised by this test — Render, +// Version, Finished — plus the list.Highlightable surface +// (SetHighlight / Highlight) used by the callback-driven scenario. +// Focus is not exercised here, so list.Focusable is not forwarded; +// add SetFocused/IsFocused if a future test needs to drive focus +// through the wrapper. +type renderCountingItem struct { + inner MessageItem + renderHits int + highlightCb func(start [4]int) +} + +func newRenderCountingItem(inner MessageItem) *renderCountingItem { + return &renderCountingItem{inner: inner} +} + +func (r *renderCountingItem) Render(width int) string { + r.renderHits++ + return r.inner.Render(width) +} + +func (r *renderCountingItem) Version() uint64 { + return r.inner.(versionedItem).Version() +} + +func (r *renderCountingItem) Finished() bool { + return r.inner.Finished() +} + +// SetHighlight forwards to the embedded item; the underlying +// highlightableMessageItem dedupes equivalent ranges and bumps the +// shared version on observable change. +func (r *renderCountingItem) SetHighlight(startLine, startCol, endLine, endCol int) { + if h, ok := r.inner.(list.Highlightable); ok { + h.SetHighlight(startLine, startCol, endLine, endCol) + if r.highlightCb != nil { + r.highlightCb([4]int{startLine, startCol, endLine, endCol}) + } + } +} + +func (r *renderCountingItem) Highlight() (int, int, int, int) { + if h, ok := r.inner.(list.Highlightable); ok { + return h.Highlight() + } + return -1, -1, -1, -1 +} + +// TestList_CallbackDrivenHighlightUnfreezeAndReFreeze covers F6 +// §4.5.1 along the live applyHighlightRange path. Instead of +// driving BeginSelectionDrag directly, the test registers a render +// callback that mutates the chat items' highlight ranges (just like +// Chat.applyHighlightRange does in production) and verifies the +// resulting cache behaviour: +// +// - Items inside the active range pick up a SetHighlight call, +// their version bumps, the F6 cache invalidates, and the list +// re-renders them on the next draw. The post-render entry is +// frozen again because the items are Finished() — but their +// stored output now reflects the highlight. +// - Subsequent draws while the range is unchanged are cache hits: +// the callback's SetHighlight call dedupes (same range), the +// version is stable, and the list serves the previous output +// verbatim without calling Render. +// - When the range moves OFF an item, the callback clears the +// highlight, the version bumps, and the item re-renders. After +// that single re-render the entry re-freezes; further draws are +// cache hits. +func TestList_CallbackDrivenHighlightUnfreezeAndReFreeze(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + + // Build three finished assistant messages so all three are + // candidates for freezing. Real items (per Round 2 spec) — the + // surrounding renderCountingItem wrapper just lets the test see + // per-item Render calls. + mk := func(id, body string) *renderCountingItem { + msg := &message.Message{ + ID: id, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{ + Thinking: "thinking", + StartedAt: testStartedAt, + FinishedAt: testFinishedAt, + }, + message.TextContent{Text: body}, + message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime}, + }, + } + inner := NewAssistantMessageItem(&sty, msg) + require.True(t, inner.Finished(), "test fixture must be Finished()") + return newRenderCountingItem(inner) + } + + a := mk("a", "alpha") + b := mk("b", "bravo") + c := mk("c", "charlie") + + l := list.NewList(a, b, c) + l.SetSize(80, 30) + + // activeRange holds the inclusive [start, end] item indexes the + // callback should highlight. -1 means no active selection. + activeRange := [2]int{-1, -1} + + cb := func(idx, _ int, item list.Item) list.Item { + hi, ok := item.(list.Highlightable) + if !ok { + return item + } + if activeRange[0] >= 0 && idx >= activeRange[0] && idx <= activeRange[1] { + // Inside the range: highlight the entire item. + hi.SetHighlight(0, 0, -1, -1) + } else { + // Outside the range: clear highlight. + hi.SetHighlight(-1, -1, -1, -1) + } + return item + } + l.RegisterRenderCallback(cb) + + // First render populates the cache. Each item renders exactly + // once even though the callback runs for all three. + _ = l.Render() + require.Equal(t, 1, a.renderHits, "first render: a renders once") + require.Equal(t, 1, b.renderHits, "first render: b renders once") + require.Equal(t, 1, c.renderHits, "first render: c renders once") + + // Subsequent renders without an active range are cache hits. + // The callback's SetHighlight call dedupes (already cleared), + // no version bump, frozen entries served verbatim. + for range 3 { + _ = l.Render() + } + require.Equal(t, 1, a.renderHits, "frozen item must not re-render across stable draws") + require.Equal(t, 1, b.renderHits, "frozen item must not re-render across stable draws") + require.Equal(t, 1, c.renderHits, "frozen item must not re-render across stable draws") + + // Activate a selection range over items a and b. The callback + // will SetHighlight on both during the next render, bumping + // their versions. The cache hit fails (version mismatch) and + // each in-range item re-renders exactly once. + activeRange = [2]int{0, 1} + _ = l.Render() + require.Equal(t, 2, a.renderHits, "in-range item must re-render after SetHighlight") + require.Equal(t, 2, b.renderHits, "in-range item must re-render after SetHighlight") + require.Equal(t, 1, c.renderHits, "out-of-range item stays frozen") + + // Verify the highlight actually landed on the in-range items. + sLine, _, eLine, _ := a.Highlight() + require.Equal(t, 0, sLine) + require.Equal(t, -1, eLine) + sLine, _, eLine, _ = c.Highlight() + require.Equal(t, -1, sLine, "out-of-range item must not be highlighted") + require.Equal(t, -1, eLine) + + // While the range stays the same, subsequent renders are cache + // hits. The callback dedupes (same range), no version bump, + // the post-render entry served verbatim. Note: items are + // re-frozen because they're still Finished() and not in the + // list's freezeSuppressed set. + for range 3 { + _ = l.Render() + } + require.Equal(t, 2, a.renderHits, "in-range item re-freezes after the highlight render") + require.Equal(t, 2, b.renderHits, "in-range item re-freezes after the highlight render") + require.Equal(t, 1, c.renderHits, "out-of-range item stays frozen") + + // Move the range off the items entirely. The callback clears + // each in-range item's highlight back to (-1,-1,-1,-1), which + // bumps their versions and triggers exactly one re-render + // each. After that, the entries re-freeze. + activeRange = [2]int{-1, -1} + _ = l.Render() + require.Equal(t, 3, a.renderHits, "exiting-range item must re-render once when highlight clears") + require.Equal(t, 3, b.renderHits, "exiting-range item must re-render once when highlight clears") + require.Equal(t, 1, c.renderHits, "never-highlighted item stays frozen") + + // Confirm the highlight has been fully cleared. + sLine, _, eLine, _ = a.Highlight() + require.Equal(t, -1, sLine) + require.Equal(t, -1, eLine) + + // And subsequent renders are cache hits again — the items + // re-froze. + for range 3 { + _ = l.Render() + } + require.Equal(t, 3, a.renderHits, "re-frozen item must not re-render across stable draws") + require.Equal(t, 3, b.renderHits, "re-frozen item must not re-render across stable draws") + require.Equal(t, 1, c.renderHits, "never-highlighted item stays frozen") +} diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go index f49cd1d6ceca12b15f705640400da6941972342e..412d85238e53ba33b8b21f7917b7e2298a424d20 100644 --- a/internal/ui/chat/assistant.go +++ b/internal/ui/chat/assistant.go @@ -1,7 +1,9 @@ package chat import ( + "encoding/binary" "fmt" + "hash/fnv" "strings" tea "charm.land/bubbletea/v2" @@ -9,21 +11,116 @@ import ( "github.com/charmbracelet/crush/internal/message" "github.com/charmbracelet/crush/internal/ui/anim" "github.com/charmbracelet/crush/internal/ui/common" + "github.com/charmbracelet/crush/internal/ui/list" "github.com/charmbracelet/crush/internal/ui/styles" "github.com/charmbracelet/x/ansi" ) // assistantMessageTruncateFormat is the text shown when an assistant message is -// truncated. +// truncated in the collapsed state. const assistantMessageTruncateFormat = "… (%d lines hidden) [click or space to expand]" +// assistantMessageTailWindowFormat is shown above a tail-windowed thinking +// block to advertise that earlier lines exist and that the user can +// promote the view to a full expansion. The promotion is wired through +// the existing ToggleExpanded path (click / space) — F5 deliberately +// does not add a new keybinding. +const assistantMessageTailWindowFormat = "… %d earlier lines hidden [click or space for full view]" + // maxCollapsedThinkingHeight defines the maximum height of the thinking const maxCollapsedThinkingHeight = 10 +// maxExpandedThinkingTailLines is the F5 tail-window cap. When the user +// expands a thinking block whose post-glamour line count exceeds this +// threshold, only the last N lines are shown with an affordance line +// indicating how many earlier lines are hidden. Clicking / pressing +// space again promotes the view to a full expansion. The slice is +// taken AFTER glamour render (not before) so fenced code blocks, +// lists, and tables are not torn at arbitrary boundaries. +const maxExpandedThinkingTailLines = 200 + +// thinkingViewMode is the F5 three-state view machine for the thinking +// block. ToggleExpanded cycles +// collapsed → tail-window → full-expanded → collapsed, skipping the +// tail-window step when the rendered thinking fits within the cap so +// short blocks still toggle in two clicks. +type thinkingViewMode uint8 + +const ( + thinkingCollapsed thinkingViewMode = iota + thinkingTailWindow + thinkingFullExpanded +) + +// assistantSection is a per-section render cache for AssistantMessageItem. +// Each section (thinking, content, error) carries its own keys so that +// streaming a section does not invalidate a different — often more +// expensive — section's cached render. srcHash is an FNV-64 of the +// section's source text; extra captures any other state that changes +// the rendered output (e.g. thinkingExpanded, the thinking footer +// inputs). valid disambiguates a real cache hit from the zero value +// when both source text and extras hash to zero. aux carries any +// per-section side data that the caller needs to recover on a hit +// (e.g. the thinking box height for click detection). +type assistantSection struct { + width int + srcHash uint64 + extra uint64 + out string + h int + aux int + valid bool +} + +// hit reports whether the cache entry matches the requested key. +func (s *assistantSection) hit(width int, srcHash, extra uint64) bool { + return s.valid && s.width == width && s.srcHash == srcHash && s.extra == extra +} + +// store records the rendered output under the given key. +func (s *assistantSection) store(width int, srcHash, extra uint64, out string, aux int) { + s.width = width + s.srcHash = srcHash + s.extra = extra + s.out = out + s.h = lipgloss.Height(out) + s.aux = aux + s.valid = true +} + +// reset drops the cached output. +func (s *assistantSection) reset() { + *s = assistantSection{} +} + +// fnv64 hashes a single string with FNV-64. +func fnv64(s string) uint64 { + h := fnv.New64a() + _, _ = h.Write([]byte(s)) + return h.Sum64() +} + +// fnvFields hashes a list of byte fields with length-prefix framing +// so that no concatenation collision can occur between distinct +// field tuples (a NUL inside one field cannot impersonate a +// boundary between two fields). Each field is preceded by its +// length encoded as 8 bytes little-endian. +func fnvFields(fields ...[]byte) uint64 { + h := fnv.New64a() + var lenBuf [8]byte + for _, f := range fields { + binary.LittleEndian.PutUint64(lenBuf[:], uint64(len(f))) + _, _ = h.Write(lenBuf[:]) + _, _ = h.Write(f) + } + return h.Sum64() +} + // AssistantMessageItem represents an assistant message in the chat UI. // // This item includes thinking, and the content but does not include the tool calls. type AssistantMessageItem struct { + *list.Versioned *highlightableMessageItem *cachedMessageItem *focusableMessageItem @@ -31,18 +128,34 @@ type AssistantMessageItem struct { message *message.Message sty *styles.Styles anim *anim.Anim - thinkingExpanded bool + thinkingViewMode thinkingViewMode thinkingBoxHeight int // Tracks the rendered thinking box height for click detection. + + // Per-section render caches. Splitting these out means content + // streaming does not invalidate the (often expensive) thinking + // render, and vice versa. + thinkingSec assistantSection + contentSec assistantSection + errorSec assistantSection + + // streamingContent caches a "stable prefix" glamour render of + // the assistant content body so each streaming flush only + // re-renders the trailing partial. F8 of + // docs/notes/2026-05-12-chat-rendering-perf.md. See + // streaming_markdown.go for the full algorithm. + streamingContent streamingMarkdown } var _ Expandable = (*AssistantMessageItem)(nil) // NewAssistantMessageItem creates a new AssistantMessageItem. func NewAssistantMessageItem(sty *styles.Styles, message *message.Message) MessageItem { + v := list.NewVersioned() a := &AssistantMessageItem{ - highlightableMessageItem: defaultHighlighter(sty), + Versioned: v, + highlightableMessageItem: defaultHighlighter(sty, v), cachedMessageItem: &cachedMessageItem{}, - focusableMessageItem: &focusableMessageItem{}, + focusableMessageItem: newFocusableMessageItem(v), message: message, sty: sty, } @@ -71,6 +184,13 @@ func (a *AssistantMessageItem) Animate(msg anim.StepMsg) tea.Cmd { if !a.isSpinning() { return nil } + // Bump the F6 list-cache version so the next draw re-renders + // this item: a spinner tick mutates anim's internal frame + // counter, which changes the rendered output but is invisible + // to the per-section content hashes. Without the bump the + // list cache would serve the previously rendered frame + // indefinitely and the spinner would appear frozen. + a.Bump() return a.anim.Animate(msg) } @@ -88,14 +208,7 @@ func (a *AssistantMessageItem) RawRender(width int) string { spinner = a.renderSpinning() } - content, height, ok := a.getCachedRender(cappedWidth) - if !ok { - content = a.renderMessageContent(cappedWidth) - height = lipgloss.Height(content) - // cache the rendered content - a.setCachedRender(content, cappedWidth, height) - } - + content, height := a.renderMessageContent(cappedWidth) highlightedContent := a.renderHighlighted(content, cappedWidth, height) if spinner != "" { if highlightedContent != "" { @@ -114,6 +227,23 @@ func (a *AssistantMessageItem) Render(width int) string { // it's wrapping logic. // We already know that the content is wrapped to the correct width in // RawRender, so we can just apply the styles directly to each line. + // + // The split + per-line prefix loop is O(L); cache the result keyed + // by (width, focused, sectionsFingerprint) so steady-state Render + // becomes a pointer return. The sectionsFingerprint folds in the + // per-section srcHash/extra so that any sub-cache change + // invalidates this prefix cache without requiring an explicit + // drop. Bypass the cache while spinning (RawRender's spinner + // suffix changes every animation frame) or while a highlight + // range is active (selection drag). + useCache := !a.isSpinning() && !a.isHighlighted() + cappedWidth := cappedMessageWidth(width) + key := a.prefixCacheKey(cappedWidth) + if useCache { + if cached, ok := a.getCachedPrefixedRender(width, key); ok { + return cached + } + } focused := a.sty.Messages.AssistantFocused.Render() blurred := a.sty.Messages.AssistantBlurred.Render() rendered := a.RawRender(width) @@ -125,45 +255,202 @@ func (a *AssistantMessageItem) Render(width int) string { lines[i] = blurred + line } } - return strings.Join(lines, "\n") + out := strings.Join(lines, "\n") + if useCache { + a.setCachedPrefixedRender(out, width, key) + } + return out +} + +// prefixCacheKey builds the F3 prefixed-render cache key. We pack the +// focus bit into bit 0 and a fingerprint of the section caches into +// the upper bits, so any change to a sub-section's source text or +// extras forces the prefix cache to miss without needing an explicit +// drop. cappedWidth is included so a cached prefix never survives a +// section-cache miss caused by a width change. The finish reason is +// folded in too because it controls the composition of +// renderMessageContent (e.g. appending the constant "Canceled" +// string) — that decision lives outside any section's own hash. +func (a *AssistantMessageItem) prefixCacheKey(cappedWidth int) uint64 { + thinkSrc, thinkExtra := a.thinkingKey() + contentSrc, contentExtra := a.contentKey() + errSrc, errExtra := a.errorKey() + h := fnv.New64a() + var buf [8]byte + writeU64 := func(v uint64) { + for i := range 8 { + buf[i] = byte(v >> (8 * i)) + } + _, _ = h.Write(buf[:]) + } + writeU64(uint64(cappedWidth)) + writeU64(thinkSrc) + writeU64(thinkExtra) + writeU64(contentSrc) + writeU64(contentExtra) + writeU64(errSrc) + writeU64(errExtra) + writeU64(a.compositionKey()) + fingerprint := h.Sum64() + var focusBit uint64 + if a.focused { + focusBit = 1 + } + return (fingerprint &^ 1) | focusBit } -// renderMessageContent renders the message content including thinking, main content, and finish reason. -func (a *AssistantMessageItem) renderMessageContent(width int) string { +// compositionKey hashes the inputs to renderMessageContent's structural +// decisions (which sections to include, whether to append the +// constant "Canceled" footer) so that flipping IsFinished or the +// finish reason invalidates the prefix cache even when no section's +// own source text changed. +func (a *AssistantMessageItem) compositionKey() uint64 { + var finishedFlag byte + var reason string + if a.message.IsFinished() { + finishedFlag = 1 + reason = string(a.message.FinishReason()) + } + // Length-prefixed framing keeps the finished flag and the reason + // string from blending into one another. + return fnvFields([]byte{finishedFlag}, []byte(reason)) +} + +// renderMessageContent renders the message content including thinking, main +// content, and finish reason. Each section is served from its own cache; +// only the section whose source text or extras changed since the last +// render is recomputed. +func (a *AssistantMessageItem) renderMessageContent(width int) (string, int) { var messageParts []string thinking := strings.TrimSpace(a.message.ReasoningContent().Thinking) content := strings.TrimSpace(a.message.Content().Text) - // if the massage has reasoning content add that first + if thinking != "" { - messageParts = append(messageParts, a.renderThinking(a.message.ReasoningContent().Thinking, width)) + messageParts = append(messageParts, a.cachedThinking(width)) } - // then add the main content if content != "" { - // add a spacer between thinking and content if thinking != "" { messageParts = append(messageParts, "") } - messageParts = append(messageParts, a.renderMarkdown(content, width)) + messageParts = append(messageParts, a.cachedContent(width)) } - // finally add any finish reason info if a.message.IsFinished() { switch a.message.FinishReason() { case message.FinishReasonCanceled: messageParts = append(messageParts, a.sty.Messages.AssistantCanceled.Render("Canceled")) case message.FinishReasonError: - messageParts = append(messageParts, a.renderError(width)) + messageParts = append(messageParts, a.cachedError(width)) } } - return strings.Join(messageParts, "\n") + out := strings.Join(messageParts, "\n") + return out, lipgloss.Height(out) +} + +// thinkingKey returns the (srcHash, extra) cache key components for the +// thinking section. extra folds in everything other than the raw +// thinking text that affects the rendered output: the view mode +// (collapsed / tail-window / full) and the footer state (which +// depends on IsThinking, ToolCalls, and ThinkingDuration). +func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) { + thinking := a.message.ReasoningContent().Thinking + srcHash := fnv64(thinking) + + showFooter := !a.message.IsThinking() || len(a.message.ToolCalls()) > 0 + var durationStr string + if showFooter { + duration := a.message.ThinkingDuration() + if duration.String() != "0s" { + durationStr = duration.String() + } + } + var footer byte + if showFooter { + footer = 1 + } + // Length-prefixed framing avoids any delimiter collision between + // the flag bytes and the duration string. The view mode is folded + // in so that toggling collapsed ↔ tail-window ↔ full invalidates + // only the thinking section, not content/error. + extra := fnvFields([]byte{byte(a.thinkingViewMode), footer}, []byte(durationStr)) + return srcHash, extra +} + +// contentKey returns the (srcHash, extra) cache key components for the +// main content section. +func (a *AssistantMessageItem) contentKey() (uint64, uint64) { + return fnv64(a.message.Content().Text), 0 +} + +// errorKey returns the (srcHash, extra) cache key components for the +// error section. Returns (0, 0) when no error is present so the cache +// stays a no-op for non-error messages. +func (a *AssistantMessageItem) errorKey() (uint64, uint64) { + if !a.message.IsFinished() || a.message.FinishReason() != message.FinishReasonError { + return 0, 0 + } + finishPart := a.message.FinishPart() + if finishPart == nil { + return 0, 0 + } + // Length-prefixed framing prevents Message+Details collisions + // between distinct (Message, Details) tuples that would + // otherwise concatenate to the same byte sequence. + return fnvFields([]byte(finishPart.Message), []byte(finishPart.Details)), 0 +} + +// cachedThinking returns the rendered thinking section, computing and +// caching it on miss. The thinking-box height (used for click target +// detection) is preserved across hits via assistantSection.aux so the +// cached path never desyncs click detection. +func (a *AssistantMessageItem) cachedThinking(width int) string { + srcHash, extra := a.thinkingKey() + if a.thinkingSec.hit(width, srcHash, extra) { + a.thinkingBoxHeight = a.thinkingSec.aux + return a.thinkingSec.out + } + out := a.renderThinking(a.message.ReasoningContent().Thinking, width) + a.thinkingSec.store(width, srcHash, extra, out, a.thinkingBoxHeight) + return out +} + +// cachedContent returns the rendered content section. +func (a *AssistantMessageItem) cachedContent(width int) string { + srcHash, extra := a.contentKey() + if a.contentSec.hit(width, srcHash, extra) { + return a.contentSec.out + } + out := a.renderMarkdown(a.message.Content().Text, width) + a.contentSec.store(width, srcHash, extra, out, 0) + return out +} + +// cachedError returns the rendered error section. +func (a *AssistantMessageItem) cachedError(width int) string { + srcHash, extra := a.errorKey() + if a.errorSec.hit(width, srcHash, extra) { + return a.errorSec.out + } + out := a.renderError(width) + a.errorSec.store(width, srcHash, extra, out, 0) + return out } // renderThinking renders the thinking/reasoning content with footer. +// +// Slicing happens AFTER glamour rendering so fenced code blocks, list +// continuations, and tables are not split mid-block — the same +// boundary problem §4.4 of the design note flags. The bordered +// ThinkingBox style is applied on top of the (already-windowed) +// lines so the visual box matches what the user sees today. func (a *AssistantMessageItem) renderThinking(thinking string, width int) string { renderer := common.QuietMarkdownRenderer(a.sty, width) + mu := common.LockMarkdownRenderer(renderer) + mu.Lock() rendered, err := renderer.Render(thinking) + mu.Unlock() if err != nil { rendered = thinking } @@ -172,13 +459,23 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string lines := strings.Split(rendered, "\n") totalLines := len(lines) - isTruncated := totalLines > maxCollapsedThinkingHeight - if !a.thinkingExpanded && isTruncated { - lines = lines[totalLines-maxCollapsedThinkingHeight:] - hint := a.sty.Messages.ThinkingTruncationHint.Render( - fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight), - ) - lines = append([]string{hint, ""}, lines...) + switch a.thinkingViewMode { + case thinkingCollapsed: + if totalLines > maxCollapsedThinkingHeight { + lines = lines[totalLines-maxCollapsedThinkingHeight:] + hint := a.sty.Messages.ThinkingTruncationHint.Render( + fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight), + ) + lines = append([]string{hint, ""}, lines...) + } + case thinkingTailWindow: + if totalLines > maxExpandedThinkingTailLines { + lines = lines[totalLines-maxExpandedThinkingTailLines:] + hint := a.sty.Messages.ThinkingTruncationHint.Render( + fmt.Sprintf(assistantMessageTailWindowFormat, totalLines-maxExpandedThinkingTailLines), + ) + lines = append([]string{hint, ""}, lines...) + } } thinkingStyle := a.sty.Messages.ThinkingBox.Width(width) @@ -202,14 +499,18 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string return result } -// renderMarkdown renders content as markdown. +// renderMarkdown renders content as markdown. F8 routes the call +// through streamingContent, which caches the glamour render of a +// "stable prefix" so each streaming flush only re-renders the +// trailing partial. The streaming cache invalidates itself on +// width change and on any content that is not a prefix-extension +// of the previously rendered content (e.g. user retried the +// turn), and falls back to a full render whenever boundary +// detection has the slightest doubt — see +// findSafeMarkdownBoundary. func (a *AssistantMessageItem) renderMarkdown(content string, width int) string { renderer := common.MarkdownRenderer(a.sty, width) - result, err := renderer.Render(content) - if err != nil { - return content - } - return strings.TrimSuffix(result, "\n") + return a.streamingContent.Render(content, width, renderer) } func (a *AssistantMessageItem) renderSpinning() string { @@ -240,23 +541,107 @@ func (a *AssistantMessageItem) isSpinning() bool { return (isThinking || !isFinished) && !hasContent && !hasToolCalls } -// SetMessage is used to update the underlying message. -func (a *AssistantMessageItem) SetMessage(message *message.Message) tea.Cmd { +// SetMessage is used to update the underlying message. Only the +// sub-section caches whose source text or extras changed are +// invalidated; the others survive and serve cache hits on the next +// RawRender. +func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd { wasSpinning := a.isSpinning() - a.message = message - a.clearCache() + a.message = msg + // Bump the F6 version even if the underlying *message.Message + // pointer is identical: callers may have mutated the message in + // place (delta append) and we cannot tell from here. The + // per-section caches dedupe identical content via FNV-64 hashes, + // so a redundant bump only costs one list-cache repopulation. + a.Bump() + // The prefix cache is keyed by a fingerprint that includes every + // section's source hash, so an unchanged section keeps its prefix + // cache valid while a changed section forces a miss naturally. + // Section caches themselves are content-keyed, so they do not + // need an explicit drop here either. if !wasSpinning && a.isSpinning() { return a.StartAnimation() } return nil } -// ToggleExpanded toggles the expanded state of the thinking box and returns -// whether the item is now expanded. +// Finished implements list.Item. The assistant message is freezable +// once the message reports IsFinished() and is no longer spinning +// (no animation tick remains pending). Streaming tail animation is +// caught by isSpinning, so freezing only kicks in once the turn is +// fully terminal. The list cache invalidates the entry on the next +// version bump if anything (focus, highlight, expansion) changes. +func (a *AssistantMessageItem) Finished() bool { + return a.message.IsFinished() && !a.isSpinning() +} + +// clearCache drops every cached render for this item, including the +// per-section caches. Shadows the embedded cachedMessageItem.clearCache +// so ClearItemCaches (style change) wipes the section caches too. +// F8: also drop the streaming-markdown stable-prefix cache because +// the cached glamour render embeds the OLD style's ANSI sequences +// and is no longer visually consistent with the new style. +func (a *AssistantMessageItem) clearCache() { + a.cachedMessageItem.clearCache() + a.thinkingSec.reset() + a.contentSec.reset() + a.errorSec.reset() + a.streamingContent.Reset() +} + +// ToggleExpanded advances the F5 thinking view-mode cycle and returns +// whether the item is now in any expanded state (tail-window or full). +// The cycle is collapsed → tail-window → full → collapsed, with the +// tail-window step skipped when the rendered thinking fits within +// maxExpandedThinkingTailLines so short blocks remain a two-click +// toggle. Both the thinking section cache and the F3 prefix cache +// fold thinkingViewMode into their keys, so no explicit invalidation +// is required here. +// +// When the message carries no thinking text the toggle is a no-op: +// there is nothing to expand, and mutating the view mode would +// thrash the thinking-section cache key for no visible benefit. func (a *AssistantMessageItem) ToggleExpanded() bool { - a.thinkingExpanded = !a.thinkingExpanded - a.clearCache() - return a.thinkingExpanded + if strings.TrimSpace(a.message.ReasoningContent().Thinking) == "" { + return a.thinkingViewMode != thinkingCollapsed + } + switch a.thinkingViewMode { + case thinkingCollapsed: + if a.tailWindowWouldTruncate() { + a.thinkingViewMode = thinkingTailWindow + } else { + a.thinkingViewMode = thinkingFullExpanded + } + case thinkingTailWindow: + a.thinkingViewMode = thinkingFullExpanded + case thinkingFullExpanded: + a.thinkingViewMode = thinkingCollapsed + } + a.Bump() + return a.thinkingViewMode != thinkingCollapsed +} + +// tailWindowWouldTruncate reports whether the current thinking text +// is long enough that the tail-window step is worth inserting into +// the toggle cycle. We use a cheap source-text logical-line count +// as the heuristic rather than peeking into the cache: the cache +// may be populated in collapsed state (where its height is bounded +// by maxCollapsedThinkingHeight and tells us nothing about the +// underlying length), and re-running glamour just to count lines +// would defeat the cache. The heuristic can over-trigger (a source +// with many short lines may wrap to fewer than N lines), in which +// case the tail-window render is visually identical to full and +// the cycle costs the user one extra toggle — preferred over the +// alternative of failing to show the affordance on a genuinely +// long block. +// +// Logical line count is `1 + newlineCount` (a string with no +// newlines is one line). Comparing newline count alone introduced +// an off-by-one that let a source whose post-newline-split length +// equalled the cap skip the tail-window step. +func (a *AssistantMessageItem) tailWindowWouldTruncate() bool { + lineCount := 1 + strings.Count(a.message.ReasoningContent().Thinking, "\n") + return lineCount > maxExpandedThinkingTailLines } // HandleMouseClick implements MouseClickable. It signals (via a true return) diff --git a/internal/ui/chat/assistant_section_cache_test.go b/internal/ui/chat/assistant_section_cache_test.go new file mode 100644 index 0000000000000000000000000000000000000000..f07f4146bdd06f249f52759f58e0b02649d20de6 --- /dev/null +++ b/internal/ui/chat/assistant_section_cache_test.go @@ -0,0 +1,464 @@ +package chat + +import ( + "strings" + "testing" + + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/ui/styles" + "github.com/stretchr/testify/require" +) + +// Fixed Unix timestamps for deterministic cache-equality tests. The +// thinking section's `extra` hash folds in ThinkingDuration, which +// in turn depends on (FinishedAt - StartedAt). Anchoring both +// timestamps removes any wall-clock dependency from the cache key +// so two builds across a second boundary still hit the cache. +const ( + testStartedAt int64 = 1_700_000_000 + testFinishedAt int64 = 1_700_000_005 + testFinishTime int64 = 1_700_000_006 +) + +// thinkingMessage builds an assistant message with a fixed reasoning +// content and an optional text content. When text is empty the +// message represents a still-thinking turn (matches IsThinking()). +// Both reasoning timestamps are anchored to fixed Unix seconds so +// ThinkingDuration is deterministic and cache-equality assertions +// don't depend on wall-clock time. +func thinkingMessage(id, thinking, text string) *message.Message { + parts := []message.ContentPart{ + message.ReasoningContent{ + Thinking: thinking, + StartedAt: testStartedAt, + FinishedAt: testFinishedAt, + }, + } + if text != "" { + parts = append(parts, message.TextContent{Text: text}) + } + return &message.Message{ + ID: id, + Role: message.Assistant, + Parts: parts, + } +} + +// errorMessage builds a finished assistant message whose finish part +// carries an error reason plus a custom message and details. +func errorMessage(id, errMsg, errDetails string) *message.Message { + return &message.Message{ + ID: id, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.TextContent{Text: "partial output"}, + message.Finish{ + Reason: message.FinishReasonError, + Message: errMsg, + Details: errDetails, + Time: testFinishTime, + }, + }, + } +} + +// renderTwoSetMessages drives a SetMessage cycle and returns the +// section-cache identity (out string pointers via direct comparison +// of the cached fields). The test compares `out` strings; identical +// output across cycles is the cache-hit indicator we rely on. +type sectionSnapshot struct { + thinking string + content string + errSec string +} + +func snapshot(a *AssistantMessageItem) sectionSnapshot { + return sectionSnapshot{ + thinking: a.thinkingSec.out, + content: a.contentSec.out, + errSec: a.errorSec.out, + } +} + +// TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking covers +// the central F4 invariant: streaming the main content through SetMessage +// must keep the cached thinking render intact, provided the inputs to +// the thinking section render (text, expanded flag, footer state) are +// unchanged. We seed an already-non-empty content so that IsThinking() +// is false on both renders — that's the steady streaming state where +// the thinking block has finished and content keeps growing. +func TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking(t *testing.T) { + sty := styles.CharmtonePantera() + thinking := "Step 1\nStep 2\nStep 3" + msg := thinkingMessage("a1", thinking, "Initial answer.") + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 71 + + _ = item.RawRender(width) + first := snapshot(item) + require.NotEmpty(t, first.thinking, "thinking section must be populated after first render") + + // Stream more content into the existing turn. Thinking text and + // footer state are byte-identical between the two renders. + updated := thinkingMessage("a1", thinking, "Initial answer. More streamed text.") + item.SetMessage(updated) + _ = item.RawRender(width) + second := snapshot(item) + + require.Equal(t, first.thinking, second.thinking, + "content streaming must not invalidate the thinking section render") + require.NotEqual(t, first.content, second.content, + "content section must have been re-rendered") +} + +// TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent is the +// mirror of the previous test: extending thinking text must not force a +// re-render of the content section. +func TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent(t *testing.T) { + sty := styles.CharmtonePantera() + content := "Final answer goes here." + msg := thinkingMessage("a2", "Step 1", content) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 73 + + _ = item.RawRender(width) + first := snapshot(item) + require.NotEmpty(t, first.content) + + updated := thinkingMessage("a2", "Step 1\nStep 2", content) + item.SetMessage(updated) + _ = item.RawRender(width) + second := snapshot(item) + + require.Equal(t, first.content, second.content, + "thinking streaming must not invalidate the content section render") + require.NotEqual(t, first.thinking, second.thinking, + "thinking text changed; thinking section must have re-rendered") +} + +// TestAssistantSectionCache_HashKeyDiscrimination asserts that two +// messages with different source text hash to different per-section +// keys, and that messages with identical source text hit the cache. +func TestAssistantSectionCache_HashKeyDiscrimination(t *testing.T) { + sty := styles.CharmtonePantera() + msgA := thinkingMessage("a3", "thinking A", "content A") + msgB := thinkingMessage("a3", "thinking B", "content B") + + itemA := NewAssistantMessageItem(&sty, msgA).(*AssistantMessageItem) + itemB := NewAssistantMessageItem(&sty, msgB).(*AssistantMessageItem) + + thinkSrcA, _ := itemA.thinkingKey() + thinkSrcB, _ := itemB.thinkingKey() + require.NotEqual(t, thinkSrcA, thinkSrcB, + "distinct thinking text must produce distinct FNV-64 source hashes") + + contentSrcA, _ := itemA.contentKey() + contentSrcB, _ := itemB.contentKey() + require.NotEqual(t, contentSrcA, contentSrcB, + "distinct content text must produce distinct FNV-64 source hashes") + + // Identical source text on a fresh item must produce the same + // hashes — keying invariant for cache hits. + itemAClone := NewAssistantMessageItem(&sty, thinkingMessage("a3", "thinking A", "content A")).(*AssistantMessageItem) + thinkSrcAClone, _ := itemAClone.thinkingKey() + contentSrcAClone, _ := itemAClone.contentKey() + require.Equal(t, thinkSrcA, thinkSrcAClone) + require.Equal(t, contentSrcA, contentSrcAClone) +} + +// TestAssistantSectionCache_CloneRoundTrip guards the contract that +// message.Clone() does not invalidate any section cache: re-keying off +// the cloned message must produce identical hashes and the section +// caches must serve byte-identical renders. +func TestAssistantSectionCache_CloneRoundTrip(t *testing.T) { + sty := styles.CharmtonePantera() + orig := thinkingMessage("a4", "Reasoning step.", "Answer text.") + item := NewAssistantMessageItem(&sty, orig).(*AssistantMessageItem) + + const width = 75 + _ = item.RawRender(width) + first := snapshot(item) + + cloned := orig.Clone() + item.SetMessage(&cloned) + _ = item.RawRender(width) + second := snapshot(item) + + require.Equal(t, first.thinking, second.thinking, "clone must hit the thinking cache") + require.Equal(t, first.content, second.content, "clone must hit the content cache") +} + +// TestAssistantSectionCache_ResizeInvalidatesAll asserts that a width +// change forces a re-render of every section. +func TestAssistantSectionCache_ResizeInvalidatesAll(t *testing.T) { + sty := styles.CharmtonePantera() + msg := errorMessage("a5", "boom", strings.Repeat("detail line\n", 5)) + // errorMessage returns FinishReasonError; combine with thinking + // content so all three sections are exercised. + msg.Parts = append([]message.ContentPart{ + message.ReasoningContent{ + Thinking: "Considering options.", + StartedAt: testStartedAt, + FinishedAt: testFinishedAt, + }, + }, msg.Parts...) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + _ = item.RawRender(77) + first := snapshot(item) + require.NotEmpty(t, first.thinking) + require.NotEmpty(t, first.content) + require.NotEmpty(t, first.errSec) + + _ = item.RawRender(117) + second := snapshot(item) + + require.NotEqual(t, first.thinking, second.thinking, "resize must re-render the thinking section") + require.NotEqual(t, first.content, second.content, "resize must re-render the content section") + require.NotEqual(t, first.errSec, second.errSec, "resize must re-render the error section") +} + +// TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent guards +// that the error section caches independently. Editing the error +// message must not invalidate the other two sections, and editing the +// content must not invalidate the error section. +func TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent(t *testing.T) { + sty := styles.CharmtonePantera() + build := func(thinking, content, errMsg, errDetails string) *message.Message { + return &message.Message{ + ID: "a6", + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{ + Thinking: thinking, + StartedAt: testStartedAt, + FinishedAt: testFinishedAt, + }, + message.TextContent{Text: content}, + message.Finish{ + Reason: message.FinishReasonError, + Message: errMsg, + Details: errDetails, + Time: testFinishTime, + }, + }, + } + } + + item := NewAssistantMessageItem(&sty, build("think", "content", "boom", "details")).(*AssistantMessageItem) + _ = item.RawRender(79) + first := snapshot(item) + + // Change only the error text. Thinking and content caches must + // survive; error cache must miss and re-render. + item.SetMessage(build("think", "content", "different boom", "different details")) + _ = item.RawRender(79) + second := snapshot(item) + + require.Equal(t, first.thinking, second.thinking, "error change must not invalidate thinking") + require.Equal(t, first.content, second.content, "error change must not invalidate content") + require.NotEqual(t, first.errSec, second.errSec, "error change must re-render the error section") + + // Now change only the content; error cache must survive. + item.SetMessage(build("think", "different content", "different boom", "different details")) + _ = item.RawRender(79) + third := snapshot(item) + + require.Equal(t, second.thinking, third.thinking) + require.NotEqual(t, second.content, third.content) + require.Equal(t, second.errSec, third.errSec, "content change must not invalidate the error section") +} + +// TestAssistantSectionCache_PrefixCacheRespectsSectionChanges guards +// the F3/F4 boundary: the prefix cache must invalidate when any +// underlying section changes. We verify by comparing the F3-cached +// Render output across SetMessage cycles. +func TestAssistantSectionCache_PrefixCacheRespectsSectionChanges(t *testing.T) { + sty := styles.CharmtonePantera() + build := func(content string) *message.Message { + return &message.Message{ + ID: "a7", + Role: message.Assistant, + Parts: []message.ContentPart{ + message.TextContent{Text: content}, + message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime}, + }, + } + } + + item := NewAssistantMessageItem(&sty, build("first content")).(*AssistantMessageItem) + item.SetFocused(true) + + const width = 81 + first := item.Render(width) + + item.SetMessage(build("second content")) + second := item.Render(width) + require.NotEqual(t, first, second, + "prefix cache must invalidate when the content section changes") + + // Re-set to the original content; the prefix cache should + // produce identical output again. + item.SetMessage(build("first content")) + third := item.Render(width) + require.Equal(t, first, third) +} + +// TestAssistantSectionCache_ByteIdenticalToFreshRender asserts that the +// F4 cached path produces the same bytes as a fresh-instance render of +// the equivalent message — i.e. caching is invisible from the outside. +// Drives a sequence of mutations (thinking change, content change, +// finish) and compares every step against an independent item rendered +// from scratch. +func TestAssistantSectionCache_ByteIdenticalToFreshRender(t *testing.T) { + sty := styles.CharmtonePantera() + const width = 83 + + type step struct { + name string + msg *message.Message + } + startedAt := testStartedAt + finishedAt := testFinishedAt + finishTime := testFinishTime + steps := []step{ + { + name: "thinking-only", + msg: &message.Message{ + ID: "iso", Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{Thinking: "first reasoning", StartedAt: startedAt}, + }, + }, + }, + { + name: "thinking-grew", + msg: &message.Message{ + ID: "iso", Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt}, + }, + }, + }, + { + name: "content-arrived", + msg: &message.Message{ + ID: "iso", Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt}, + message.TextContent{Text: "the answer"}, + }, + }, + }, + { + name: "finished-end-turn", + msg: &message.Message{ + ID: "iso", Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt}, + message.TextContent{Text: "the answer"}, + message.Finish{Reason: message.FinishReasonEndTurn, Time: finishTime}, + }, + }, + }, + } + + first := steps[0].msg.Clone() + cached := NewAssistantMessageItem(&sty, &first).(*AssistantMessageItem) + for _, s := range steps { + cached.SetMessage(s.msg) + freshMsg := s.msg.Clone() + fresh := NewAssistantMessageItem(&sty, &freshMsg).(*AssistantMessageItem) + require.Equal(t, fresh.RawRender(width), cached.RawRender(width), + "step %q: cached path must match fresh render byte-for-byte", s.name) + } +} + +// TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange +// guards the F3 prefix cache against composition-only changes: +// flipping the finish reason from EndTurn to Canceled appends a +// constant "Canceled" line via renderMessageContent, but no +// section's own source text changes. The prefix cache must observe +// the difference (compositionKey is folded into prefixCacheKey) and +// the resulting bytes must differ. As a second guarantee, a fresh +// item built with the same final state must produce byte-equal +// output to the cached item — caching must never produce stale or +// divergent renders. +func TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange(t *testing.T) { + sty := styles.CharmtonePantera() + const width = 87 + + build := func(reason message.FinishReason) *message.Message { + return &message.Message{ + ID: "comp", + Role: message.Assistant, + Parts: []message.ContentPart{ + message.TextContent{Text: "hi"}, + message.Finish{Reason: reason, Time: testFinishTime}, + }, + } + } + + item := NewAssistantMessageItem(&sty, build(message.FinishReasonEndTurn)).(*AssistantMessageItem) + endTurnOut := item.Render(width) + + // Flip only the finish reason. Thinking is empty and content + // text is unchanged, so no section's source hash moves; only + // compositionKey shifts. The prefix cache must miss. + item.SetMessage(build(message.FinishReasonCanceled)) + canceledOut := item.Render(width) + require.NotEqual(t, endTurnOut, canceledOut, + "prefix cache must invalidate on composition-only change (finish reason)") + + // A fresh item built with the same final state must match the + // cached item byte-for-byte — caching is invisible from the + // outside and never serves stale output. + fresh := NewAssistantMessageItem(&sty, build(message.FinishReasonCanceled)).(*AssistantMessageItem) + require.Equal(t, fresh.Render(width), canceledOut, + "cached output must equal a fresh render of the same final state") +} + +// TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit guards +// click-detection geometry across thinking-section cache hits. The +// thinking box height feeds HandleMouseClick; it is recomputed +// inside renderThinking and must be restored from +// assistantSection.aux when the thinking cache hits. We render once +// to capture the original height, trigger a content-only change so +// thinkingKey stays identical (thinking text, expanded flag, and +// footer state all unchanged), render again, and assert the +// thinkingBoxHeight field is preserved. +func TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit(t *testing.T) { + sty := styles.CharmtonePantera() + const width = 71 + + thinking := strings.Join([]string{ + "Considering the request.", + "Looking at the relevant files.", + "Drafting a plan.", + "Verifying constraints.", + }, "\n") + msg := thinkingMessage("hbox", thinking, "initial answer") + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + item.thinkingViewMode = thinkingFullExpanded + + _ = item.RawRender(width) + originalHeight := item.thinkingBoxHeight + require.Greater(t, originalHeight, 0, + "thinking box height must be populated after first render") + + // Stomp the field so a stale read (cache hit that fails to + // restore aux) is detectable. Then trigger a content-only + // change: thinkingKey is byte-identical between renders, so + // the thinking section cache must hit and restore the + // preserved height via assistantSection.aux. + item.thinkingBoxHeight = -1 + updated := thinkingMessage("hbox", thinking, "initial answer with more streamed text") + item.SetMessage(updated) + _ = item.RawRender(width) + + require.Equal(t, originalHeight, item.thinkingBoxHeight, + "thinkingBoxHeight must be preserved across thinking section cache hits "+ + "so HandleMouseClick keeps targeting the right rows") +} diff --git a/internal/ui/chat/assistant_test.go b/internal/ui/chat/assistant_test.go index 8ccbb403c30a7aff2448c082d012512ee06cdc18..5784eeff7a1daf5e2876ca7e520074b5642bf7e5 100644 --- a/internal/ui/chat/assistant_test.go +++ b/internal/ui/chat/assistant_test.go @@ -10,21 +10,108 @@ import ( ) // TestAssistantMessageItemExpandable guards the Expandable contract on -// AssistantMessageItem. The earlier implementation returned no value, which -// meant the type silently did not satisfy chat.Expandable and the -// keyboard-driven expand path in model/chat.go skipped thinking blocks. +// AssistantMessageItem along the keyboard-driven expand path. The earlier +// implementation returned no value, which meant the type silently did +// not satisfy chat.Expandable and the keyboard-driven expand path in +// model/chat.go skipped thinking blocks. +// +// We exercise the contract through the bare Expandable interface (the +// same dispatch site model.Chat.ToggleExpandedSelectedItem uses), which +// proves both that AssistantMessageItem still satisfies the interface +// and that the bool return reports the right semantic state at every +// point in the cycle. func TestAssistantMessageItemExpandable(t *testing.T) { t.Parallel() sty := styles.CharmtonePantera() - msg := &message.Message{ID: "m1", Role: message.Assistant} - item := NewAssistantMessageItem(&sty, msg) + // Short thinking: under the tail-window cap, so the cycle is + // collapsed -> full -> collapsed (tail-window is skipped). + msg := thinkingMessage("m1", "step one\nstep two\nstep three", "") + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) - exp, ok := item.(Expandable) + exp, ok := any(item).(Expandable) require.True(t, ok, "AssistantMessageItem must satisfy Expandable") - require.True(t, exp.ToggleExpanded(), "first toggle should report expanded") - require.False(t, exp.ToggleExpanded(), "second toggle should report collapsed") + require.Equal(t, thinkingCollapsed, item.thinkingViewMode, + "new items must start in the collapsed view-mode") + require.True(t, exp.ToggleExpanded(), + "first toggle of a non-empty thinking block must report expanded") + require.Equal(t, thinkingFullExpanded, item.thinkingViewMode, + "short blocks must skip tail-window and land in full expansion") + require.False(t, exp.ToggleExpanded(), + "second toggle must report collapsed (cycle closed)") + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) +} + +// TestAssistantMessageItemExpandableEmptyThinkingNoOp guards the B2 +// fix: a message with no thinking text must treat ToggleExpanded as a +// no-op. Mutating the view mode in that case would thrash the +// thinking-section cache key for no visible benefit and would surprise +// the caller (model.Chat.ToggleExpandedSelectedItem would treat a +// "now collapsed" return as a real state change and re-scroll on it). +func TestAssistantMessageItemExpandableEmptyThinkingNoOp(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := &message.Message{ID: "m1-empty", Role: message.Assistant} + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + exp, ok := any(item).(Expandable) + require.True(t, ok, "AssistantMessageItem must satisfy Expandable") + + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) + require.False(t, exp.ToggleExpanded(), + "empty thinking must report current (collapsed) state without flipping") + require.Equal(t, thinkingCollapsed, item.thinkingViewMode, + "empty-thinking toggle must not mutate thinkingViewMode") + + // Whitespace-only thinking is still effectively empty. + item.message.Parts = []message.ContentPart{ + message.ReasoningContent{Thinking: " \n\n\t ", StartedAt: testStartedAt}, + } + require.False(t, exp.ToggleExpanded()) + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) +} + +// TestAssistantMessageItemTailWindowBoundary guards the B1 fix: the +// tail-window heuristic must compare logical line counts (1 + +// newline count) against the cap, not raw newline counts. A source +// whose logical line count exactly equals the cap must NOT trip the +// tail-window step (full render still fits cleanly under the cap), +// while one logical line over the cap must trip it. +func TestAssistantMessageItemTailWindowBoundary(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + + atCap := buildLines(maxExpandedThinkingTailLines) + overCap := buildLines(maxExpandedThinkingTailLines + 1) + + atItem := NewAssistantMessageItem(&sty, thinkingMessage("at-cap", atCap, "")).(*AssistantMessageItem) + require.False(t, atItem.tailWindowWouldTruncate(), + "a source with exactly N logical lines must not trip the tail-window step") + + overItem := NewAssistantMessageItem(&sty, thinkingMessage("over-cap", overCap, "")).(*AssistantMessageItem) + require.True(t, overItem.tailWindowWouldTruncate(), + "a source with N+1 logical lines must trip the tail-window step") +} + +// buildLines returns a string of n logical lines (n-1 newlines). Each +// line is a unique short token so callers can distinguish head from +// tail in rendered output if they need to. +func buildLines(n int) string { + if n <= 0 { + return "" + } + var b []byte + for i := 1; i <= n; i++ { + if i > 1 { + b = append(b, '\n') + } + b = append(b, 'l', 'n') + b = append(b, []byte(itoa(i))...) + } + return string(b) } // TestAssistantMessageItemHandleMouseClick ensures HandleMouseClick does not @@ -40,15 +127,16 @@ func TestAssistantMessageItemHandleMouseClick(t *testing.T) { item.thinkingBoxHeight = 5 // Click inside the thinking box signals handled but must not mutate - // the expanded state. + // the view-mode state. require.True(t, item.HandleMouseClick(ansi.MouseLeft, 0, 2)) - require.False(t, item.thinkingExpanded, "HandleMouseClick must not toggle expansion on its own") + require.Equal(t, thinkingCollapsed, item.thinkingViewMode, + "HandleMouseClick must not toggle expansion on its own") // Click outside the thinking box is ignored entirely. require.False(t, item.HandleMouseClick(ansi.MouseLeft, 0, 10)) - require.False(t, item.thinkingExpanded) + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) // Non-left button is ignored. require.False(t, item.HandleMouseClick(ansi.MouseRight, 0, 2)) - require.False(t, item.thinkingExpanded) + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) } diff --git a/internal/ui/chat/assistant_thinking_window_test.go b/internal/ui/chat/assistant_thinking_window_test.go new file mode 100644 index 0000000000000000000000000000000000000000..3b685847672eb6ff90f985bea5c9900856da6c93 --- /dev/null +++ b/internal/ui/chat/assistant_thinking_window_test.go @@ -0,0 +1,500 @@ +package chat + +import ( + "strings" + "testing" + + "charm.land/lipgloss/v2" + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/ui/styles" + "github.com/charmbracelet/x/ansi" + "github.com/stretchr/testify/require" +) + +// thinkingMessageWithLines builds a still-thinking assistant message +// whose reasoning content is `count` short paragraphs separated by +// blank lines. The blank-line separation is what matters: glamour +// renders paragraph blocks one-per-line in the output (with a +// trailing blank line between paragraphs) instead of reflowing the +// entire input into one big wrapped paragraph. That gives us a +// post-glamour line count we can drive past the tail-window +// threshold deterministically. Each paragraph is tagged with its +// (1-based) index so the test can identify head vs tail in the +// rendered output. +// +// The message has no text content and no Finish part, so +// IsThinking() returns true and the render path skips the +// "Thought for" footer — keeping the rendered height computation +// simple. +func thinkingMessageWithLines(id string, count int) *message.Message { + var b strings.Builder + for i := 1; i <= count; i++ { + b.WriteString("ln") + b.WriteString(itoa(i)) + if i < count { + // Blank line between paragraphs: glamour preserves the + // per-paragraph structure rather than reflowing into one + // wrapped block, so totalLines tracks count predictably. + b.WriteString("\n\n") + } + } + return &message.Message{ + ID: id, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{ + Thinking: b.String(), + StartedAt: testStartedAt, + }, + }, + } +} + +// itoa is a local stdlib-free integer formatter used only by these +// tests; pulling fmt in just for %d would be wasteful when the test +// fixtures already churn 5000+ short strings. +func itoa(n int) string { + if n == 0 { + return "0" + } + var buf [20]byte + i := len(buf) + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + return string(buf[i:]) +} + +// renderedThinkingHeight returns the line count of the cached +// thinking section render only (not the full RawRender, which also +// includes the content and error sections). Drives a render at +// `width` first to populate the cache. +func renderedThinkingHeight(t *testing.T, item *AssistantMessageItem, width int) int { + t.Helper() + _ = item.RawRender(width) + require.NotEmpty(t, item.thinkingSec.out, + "thinking section must be populated after RawRender") + return lipgloss.Height(item.thinkingSec.out) +} + +// TestThinkingWindow_CollapsedCapPreserved guards that F5 did not +// regress the existing collapsed-mode behaviour: a 5000-line +// thinking block in the default (collapsed) state still renders at +// most a small bounded height — the last `maxCollapsedThinkingHeight` +// lines plus the truncation hint. The thinking message keeps +// IsThinking() == true, so the optional "Thought for" footer is +// suppressed and the section height equals the box height. +func TestThinkingWindow_CollapsedCapPreserved(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := thinkingMessageWithLines("collapsed", 5000) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + // Default state must be collapsed. + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) + + // Unique odd width avoids sharing the glamour renderer cache with + // any other parallel test (the renderer instance is memoized per + // width and is not safe for concurrent Render calls). + const width = 91 + height := renderedThinkingHeight(t, item, width) + + // Collapsed mode keeps the existing cap: last 10 lines + a + // 2-line hint prefix (hint + blank). Allow a small slack for + // any future style-driven padding so the test is robust to + // cosmetic tweaks while still being orders of magnitude below + // the 5000-line source. + const collapsedUpperBound = maxCollapsedThinkingHeight + 5 + require.LessOrEqual(t, height, collapsedUpperBound, + "collapsed mode must remain bounded by the small cap; got %d", height) +} + +// TestThinkingWindow_ExpandedShortSkipsTailWindow guards that a +// short thinking block (well under the tail-window cap) still +// toggles directly to full expansion without an intermediate +// tail-window step and shows no affordance footer. The cycle is +// collapsed -> full -> collapsed for short blocks; tail-window is +// only inserted when it would actually elide content. +func TestThinkingWindow_ExpandedShortSkipsTailWindow(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + const lines = 50 + require.Less(t, lines, maxExpandedThinkingTailLines, + "this test relies on the source being well under the tail cap") + msg := thinkingMessageWithLines("short", lines) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + require.True(t, item.ToggleExpanded(), + "first toggle should report expanded") + require.Equal(t, thinkingFullExpanded, item.thinkingViewMode, + "short blocks must skip tail-window and go straight to full expansion") + + const width = 93 + _ = item.RawRender(width) + out := item.thinkingSec.out + plain := ansi.Strip(out) + + require.NotContains(t, plain, "earlier lines hidden", + "short blocks must not show the tail-window affordance") + require.NotContains(t, plain, "lines hidden", + "short expanded blocks must not show any truncation hint") + require.Contains(t, plain, "ln1 ", + "a fully expanded short block must include the very first source paragraph") + require.Contains(t, plain, "ln50 ", + "a fully expanded short block must include the last source paragraph") +} + +// TestThinkingWindow_TailWindowed asserts the central F5 behaviour: +// expanding a long thinking block produces a tail window of size +// `maxExpandedThinkingTailLines` plus the affordance footer, with +// the LAST source line present (i.e. we tailed, not headed) and +// earlier lines elided. +// +// Beyond presence/absence of sentinels, this test verifies a true +// `tail -K` relationship between the tail-windowed render and the +// fully-expanded render of the same source at the same width: the +// last K plain-ANSI lines of the windowed render must byte-equal +// the last K lines of the unwindowed render. +// +// K is sized below the cap to absorb the affordance prefix (hint + +// blank line) and any small framing differences introduced by the +// bordered ThinkingBox. The cap minus 5 leaves a comfortable margin +// for padding/footer rows while still asserting that the bulk of +// the rendered tail is identical. +func TestThinkingWindow_TailWindowed(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + const total = 5000 + const width = 95 + + // Tail-windowed render. + tailMsg := thinkingMessageWithLines("tail", total) + tailItem := NewAssistantMessageItem(&sty, tailMsg).(*AssistantMessageItem) + require.True(t, tailItem.ToggleExpanded(), "first toggle should report expanded") + require.Equal(t, thinkingTailWindow, tailItem.thinkingViewMode, + "a long block must enter tail-window after the first toggle") + + height := renderedThinkingHeight(t, tailItem, width) + + // The visible window is N tail lines plus an affordance line + // and a blank-line spacer (matching the existing collapsed-mode + // hint structure). Allow a small slack for style-driven + // padding. + const expectedFloor = maxExpandedThinkingTailLines + 1 + const expectedCeil = maxExpandedThinkingTailLines + 5 + require.GreaterOrEqual(t, height, expectedFloor, + "tail-window must include at least N + affordance lines; got %d", height) + require.LessOrEqual(t, height, expectedCeil, + "tail-window must not exceed N + a small padding budget; got %d", height) + + tailPlain := ansi.Strip(tailItem.thinkingSec.out) + + require.Contains(t, tailPlain, "earlier lines hidden", + "tail-windowed render must include the affordance footer") + require.Contains(t, tailPlain, "ln5000", + "tail-windowed render must include the LAST source paragraph — we tailed, not headed") + require.NotContains(t, tailPlain, "ln1 ", + "tail-windowed render must elide early source paragraphs") + + // Independent reference render: same source, same width, full + // expansion (no tail slice). The tail-windowed output's last K + // lines must byte-equal the unwindowed output's last K lines. + fullMsg := thinkingMessageWithLines("tail-full-ref", total) + fullItem := NewAssistantMessageItem(&sty, fullMsg).(*AssistantMessageItem) + fullItem.thinkingViewMode = thinkingFullExpanded + _ = fullItem.RawRender(width) + fullPlain := ansi.Strip(fullItem.thinkingSec.out) + + tailLines := strings.Split(tailPlain, "\n") + fullLines := strings.Split(fullPlain, "\n") + + // K is the cap minus a small budget that covers the affordance + // prefix (hint line + blank line) and any framing differences + // the bordered ThinkingBox style may introduce around the + // edges. Documented inline because going much larger lets the + // affordance row leak into the comparison; going much smaller + // dilutes the assertion. + const K = maxExpandedThinkingTailLines - 5 + require.GreaterOrEqual(t, len(tailLines), K, + "tail render must contain at least K lines; got %d", len(tailLines)) + require.GreaterOrEqual(t, len(fullLines), K, + "full render must contain at least K lines; got %d", len(fullLines)) + + tailTail := tailLines[len(tailLines)-K:] + fullTail := fullLines[len(fullLines)-K:] + require.Equal(t, fullTail, tailTail, + "tail-windowed render's last %d lines must byte-equal the unwindowed render's last %d lines (true tail -K relationship)", + K, K) +} + +// TestThinkingWindow_PromoteToFull verifies the cycle continues from +// tail-window to full expansion: the second toggle drops the +// affordance, removes the tail slice, and produces a render that +// matches a fresh item rendered directly in the full-expanded +// state. +func TestThinkingWindow_PromoteToFull(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + const total = 1500 + msg := thinkingMessageWithLines("promote", total) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 97 + + require.True(t, item.ToggleExpanded()) + require.Equal(t, thinkingTailWindow, item.thinkingViewMode) + _ = item.RawRender(width) + tailOut := item.thinkingSec.out + require.Contains(t, ansi.Strip(tailOut), "earlier lines hidden") + + require.True(t, item.ToggleExpanded(), "second toggle stays expanded (full)") + require.Equal(t, thinkingFullExpanded, item.thinkingViewMode) + _ = item.RawRender(width) + fullOut := item.thinkingSec.out + fullPlain := ansi.Strip(fullOut) + + require.NotContains(t, fullPlain, "earlier lines hidden", + "full expansion must drop the tail-window affordance") + require.Contains(t, fullPlain, "ln1 ", + "full expansion must include the first source paragraph") + require.Contains(t, fullPlain, "ln1500 ", + "full expansion must include the last source paragraph") + + // Independent reference: a fresh item, rendered straight into + // the full-expanded state, must produce byte-equal output. + freshMsg := thinkingMessageWithLines("promote-fresh", total) + fresh := NewAssistantMessageItem(&sty, freshMsg).(*AssistantMessageItem) + fresh.thinkingViewMode = thinkingFullExpanded + _ = fresh.RawRender(width) + require.Equal(t, fresh.thinkingSec.out, fullOut, + "cached full-expanded output must match a fresh full-expanded render") + + // And the cycle closes back to collapsed. + require.False(t, item.ToggleExpanded(), "third toggle must report collapsed") + require.Equal(t, thinkingCollapsed, item.thinkingViewMode) +} + +// sectionKey is the tuple that defines a cache-hit identity for an +// assistantSection: (width, srcHash, extra). Comparing this tuple +// across mutations is a stronger invariant than byte-equality of +// rendered output: byte-equality could in principle hold even if +// the cache invalidated and re-rendered identical bytes, while +// tuple-equality proves the lookup key never moved. +type sectionKey struct { + width int + srcHash uint64 + extra uint64 +} + +func keyOf(s assistantSection) sectionKey { + return sectionKey{width: s.width, srcHash: s.srcHash, extra: s.extra} +} + +// TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow +// guards the F4/F5 boundary: streaming the main content while the +// thinking block sits in tail-window mode must NOT invalidate the +// thinking section cache. Tail-window state is folded into +// thinkingKey()'s extra hash, so changing only the content text +// keeps thinking's (srcHash, extra) tuple identical and the cache +// hits. +// +// The assertion is on the cache key tuple, not just rendered bytes: +// equal output could in principle survive a re-render with +// identical inputs, but identical (width, srcHash, extra) tuples +// across the SetMessage cycle prove the thinking cache was never +// invalidated to begin with. The mirror tuple on the content +// section MUST move (the source text changed), or the test isn't +// exercising what it claims to. +func TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + const total = 1000 + + build := func(content string) *message.Message { + var b strings.Builder + for i := 1; i <= total; i++ { + b.WriteString("ln") + b.WriteString(itoa(i)) + if i < total { + b.WriteString("\n\n") + } + } + parts := []message.ContentPart{ + message.ReasoningContent{ + Thinking: b.String(), + StartedAt: testStartedAt, + FinishedAt: testFinishedAt, + }, + } + if content != "" { + parts = append(parts, message.TextContent{Text: content}) + } + return &message.Message{ID: "tail-stream", Role: message.Assistant, Parts: parts} + } + + item := NewAssistantMessageItem(&sty, build("first answer")).(*AssistantMessageItem) + item.thinkingViewMode = thinkingTailWindow + + const width = 99 + _ = item.RawRender(width) + first := snapshot(item) + firstThinkingKey := keyOf(item.thinkingSec) + firstContentKey := keyOf(item.contentSec) + require.NotEmpty(t, first.thinking) + + item.SetMessage(build("first answer with more streaming text")) + _ = item.RawRender(width) + second := snapshot(item) + secondThinkingKey := keyOf(item.thinkingSec) + secondContentKey := keyOf(item.contentSec) + + require.Equal(t, firstThinkingKey, secondThinkingKey, + "thinking section's (width, srcHash, extra) tuple must not move "+ + "across a content-only update — proves the cache key never invalidated") + require.Equal(t, first.thinking, second.thinking, + "content streaming must not invalidate the tail-windowed thinking cache") + require.NotEqual(t, firstContentKey, secondContentKey, + "content section's tuple MUST move; otherwise this test isn't exercising a real content change") + require.NotEqual(t, first.content, second.content, + "content section must have re-rendered") +} + +// TestThinkingWindow_ToggleInvalidatesOnlyThinking verifies that +// cycling thinkingViewMode invalidates the thinking section cache +// alone — content and error caches survive across the toggle. +// +// Like TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow, +// the assertion is on the cache key tuple (width, srcHash, extra) +// at each section, not just on rendered bytes: +// - thinking's tuple MUST move (extra folds in thinkingViewMode) +// - content's and error's tuples MUST NOT move (their keys depend +// only on their own source text, untouched by the toggle). +func TestThinkingWindow_ToggleInvalidatesOnlyThinking(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + const total = 1500 + build := func() *message.Message { + var b strings.Builder + for i := 1; i <= total; i++ { + b.WriteString("ln") + b.WriteString(itoa(i)) + if i < total { + b.WriteString("\n\n") + } + } + return &message.Message{ + ID: "toggle-iso", + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{ + Thinking: b.String(), + StartedAt: testStartedAt, + FinishedAt: testFinishedAt, + }, + message.TextContent{Text: "answer text"}, + message.Finish{ + Reason: message.FinishReasonError, + Message: "boom", + Details: "details", + Time: testFinishTime, + }, + }, + } + } + + item := NewAssistantMessageItem(&sty, build()).(*AssistantMessageItem) + + const width = 101 + _ = item.RawRender(width) + first := snapshot(item) + firstThink := keyOf(item.thinkingSec) + firstContent := keyOf(item.contentSec) + firstErr := keyOf(item.errorSec) + require.NotEmpty(t, first.thinking) + require.NotEmpty(t, first.content) + require.NotEmpty(t, first.errSec) + + // Cycle: collapsed -> tail-window. Only thinking should change. + require.True(t, item.ToggleExpanded()) + require.Equal(t, thinkingTailWindow, item.thinkingViewMode) + _ = item.RawRender(width) + second := snapshot(item) + secondThink := keyOf(item.thinkingSec) + secondContent := keyOf(item.contentSec) + secondErr := keyOf(item.errorSec) + + require.NotEqual(t, firstThink, secondThink, + "thinking section's tuple MUST move on toggle (extra folds in thinkingViewMode)") + require.Equal(t, firstContent, secondContent, + "content section's tuple must not move on a thinking toggle") + require.Equal(t, firstErr, secondErr, + "error section's tuple must not move on a thinking toggle") + require.NotEqual(t, first.thinking, second.thinking, + "toggling into tail-window must re-render the thinking section") + require.Equal(t, first.content, second.content, + "toggling thinking view-mode must not invalidate the content section") + require.Equal(t, first.errSec, second.errSec, + "toggling thinking view-mode must not invalidate the error section") + + // Cycle: tail-window -> full. Same expectation. + require.True(t, item.ToggleExpanded()) + require.Equal(t, thinkingFullExpanded, item.thinkingViewMode) + _ = item.RawRender(width) + third := snapshot(item) + thirdThink := keyOf(item.thinkingSec) + thirdContent := keyOf(item.contentSec) + thirdErr := keyOf(item.errorSec) + + require.NotEqual(t, secondThink, thirdThink, + "thinking section's tuple MUST move on the second toggle as well") + require.Equal(t, secondContent, thirdContent, + "content section's tuple must remain stable across the second toggle") + require.Equal(t, secondErr, thirdErr, + "error section's tuple must remain stable across the second toggle") + require.NotEqual(t, second.thinking, third.thinking, + "toggling into full expansion must re-render the thinking section") + require.Equal(t, second.content, third.content) + require.Equal(t, second.errSec, third.errSec) +} + +// TestThinkingWindow_BoxHeightTracksWindow asserts that +// thinkingBoxHeight reflects the WINDOWED render's height in +// tail-window mode, not the (much larger) full thinking height. +// This is what HandleMouseClick uses to detect whether a click +// landed on the thinking box, so getting it wrong would make +// click detection extend off the bottom of the visible box. +func TestThinkingWindow_BoxHeightTracksWindow(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + const total = 5000 + msg := thinkingMessageWithLines("box-height", total) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 103 + + // Tail-window: height should be roughly the cap. + item.thinkingViewMode = thinkingTailWindow + _ = item.RawRender(width) + tailHeight := item.thinkingBoxHeight + require.Greater(t, tailHeight, 0) + require.LessOrEqual(t, tailHeight, maxExpandedThinkingTailLines+5, + "tail-window box height must reflect the windowed render, not the full thinking height; got %d", + tailHeight) + + // Full expansion: height should grow well past the tail cap. + item.thinkingViewMode = thinkingFullExpanded + _ = item.RawRender(width) + fullHeight := item.thinkingBoxHeight + require.Greater(t, fullHeight, maxExpandedThinkingTailLines*2, + "full expansion box height must reflect the full thinking render; got %d", + fullHeight) +} diff --git a/internal/ui/chat/incremental_glamour_test.go b/internal/ui/chat/incremental_glamour_test.go new file mode 100644 index 0000000000000000000000000000000000000000..7f6e7ac7380bcfee53dd35edb84387f2cd74973b --- /dev/null +++ b/internal/ui/chat/incremental_glamour_test.go @@ -0,0 +1,853 @@ +package chat + +import ( + "strings" + "testing" + + "charm.land/glamour/v2" + "github.com/charmbracelet/crush/internal/ui/styles" + "github.com/stretchr/testify/require" +) + +// newTestRenderer builds a fresh glamour renderer for the given +// width. We deliberately do NOT share renderers between calls in +// the equivalence tests so any hidden state in +// [glamour.TermRenderer] cannot leak from a "cached" rendering +// path into a "fresh" rendering path. +func newTestRenderer(t *testing.T, width int) *glamour.TermRenderer { + t.Helper() + sty := styles.CharmtonePantera() + r, err := glamour.NewTermRenderer( + glamour.WithStyles(sty.Markdown), + glamour.WithWordWrap(width), + ) + require.NoError(t, err) + return r +} + +// freshRender renders content as a single document with a fresh +// glamour renderer and applies the same trailing-newline trim +// that streamingMarkdown.Render does. Use this for byte- and +// visible-equivalence comparisons against the streaming path. +func freshRender(t *testing.T, content string, width int) string { + t.Helper() + r := newTestRenderer(t, width) + out, err := r.Render(content) + require.NoError(t, err) + return strings.TrimSuffix(out, "\n") +} + +// stripANSI removes all ANSI CSI escape sequences from s so two +// renders with different colour state can be compared on their +// visible glyphs alone. +func stripANSI(s string) string { + var b strings.Builder + b.Grow(len(s)) + i := 0 + for i < len(s) { + if s[i] == 0x1b && i+1 < len(s) && s[i+1] == '[' { + j := i + 2 + for j < len(s) { + c := s[j] + if c >= 0x40 && c <= 0x7e { + j++ + break + } + j++ + } + i = j + continue + } + b.WriteByte(s[i]) + i++ + } + return b.String() +} + +// normalizeRender canonicalises a rendered glamour string for +// visual-equivalence comparison: strip ANSI, drop per-line +// trailing whitespace, drop leading/trailing blank lines, and +// collapse consecutive blank lines to a single blank line. +// +// Glamour pads rendered lines with trailing spaces and adds top/ +// bottom block margins that differ subtly between "render the +// whole document at once" and "render two halves and concatenate +// them." Per F8 design principle D, those byte-level differences +// are acceptable as long as the visible content matches; this +// helper makes that comparison explicit. +func normalizeRender(s string) string { + clean := stripANSI(s) + lines := strings.Split(clean, "\n") + for i, l := range lines { + lines[i] = strings.TrimRight(l, " \t") + } + // Collapse consecutive blank lines. + out := make([]string, 0, len(lines)) + prevBlank := false + for _, l := range lines { + blank := l == "" + if blank && prevBlank { + continue + } + out = append(out, l) + prevBlank = blank + } + // Trim leading and trailing blanks. + for len(out) > 0 && out[0] == "" { + out = out[1:] + } + for len(out) > 0 && out[len(out)-1] == "" { + out = out[:len(out)-1] + } + return strings.Join(out, "\n") +} + +// containsRawMarkdownSource reports whether the visible portion of +// rendered contains literal markdown source markers that should +// have been consumed by glamour. Used by T2 to assert that +// intermediate streaming flushes don't leak raw source through to +// the user. We deliberately only flag markers that glamour +// removes during rendering ("```" fence delimiters, "|" table +// pipes embedded in a line that also contains pipes — actual +// table syntax — and bare "###" headers); pipes-in-prose and +// dashes are too common to flag. +func containsRawMarkdownSource(rendered string) bool { + clean := stripANSI(rendered) + if strings.Contains(clean, "```") { + return true + } + for _, line := range strings.Split(clean, "\n") { + if strings.HasPrefix(strings.TrimLeft(line, " \t"), "###") { + return true + } + } + return false +} + +// ----------------------------------------------------------------------- +// T1: findSafeMarkdownBoundary unit tests. +// ----------------------------------------------------------------------- + +// TestFindSafeMarkdownBoundary_TableDriven exercises the +// findSafeMarkdownBoundary decision tree across the full set of +// constructs §4.4 calls out: plain paragraphs, fenced code (open +// and closed), lists, tables, block quotes, and setext headers. +func TestFindSafeMarkdownBoundary_TableDriven(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + content string + // want is the expected boundary; -1 means "no safe + // boundary." When >=0 the test asserts content[:want] + // ends after a blank-line separator and content[:want] + // is a complete prefix. + want int + }{ + { + name: "empty", + content: "", + want: -1, + }, + { + name: "single line", + content: "Just a single paragraph", + want: -1, + }, + { + name: "two paragraphs", + content: "First paragraph.\n\nSecond paragraph.", + // boundary at start of "Second" + want: len("First paragraph.\n\n"), + }, + { + name: "three paragraphs picks latest", + content: "First.\n\nSecond.\n\nThird.", + want: len("First.\n\nSecond.\n\n"), + }, + { + name: "open fence at end", + content: "Para.\n\n```go\nfoo()\n", + // no closing fence — every blank-line candidate + // before content end is INSIDE the fence (the open + // fence opened at offset 7). Actually the ONLY + // blank line is between "Para." and "```go", so + // candidate boundary is right before "```go". At + // that point fence count = 0, even, but the line + // AFTER (the first non-blank) is "```go" which + // would change rendering of the prefix… hmm, + // actually it wouldn't change the prefix's + // rendering because the prefix is just "Para.\n\n". + // The boundary would be ACCEPTED. Let's check + // what our impl does. + want: len("Para.\n\n"), + }, + { + name: "inside open fence: no candidate after open", + content: "Para.\n\n```go\nfoo()\n\nbar()\n", + // blank line after "foo()" is INSIDE the fence + // (fence count at that prefix = 1, odd), must + // reject. The earlier blank line between "Para." + // and "```go" should still be safe (fence count + // at that prefix = 0). + want: len("Para.\n\n"), + }, + { + name: "closed fence followed by paragraph", + content: "Para1.\n\n```\nfoo()\n```\n\nPara2.", + // latest blank line is between "```" and "Para2."; + // fence count at that prefix = 2 (even), last + // non-blank line is "```" which is not a list/ + // table/quote/setext. + want: len("Para1.\n\n```\nfoo()\n```\n\n"), + }, + { + name: "open list at end", + content: "Para.\n\n- one\n- two\n", + // last non-blank line of any blank-bounded prefix + // is a list item; our boundary check rejects. + // The blank line between "Para." and "- one" is + // the only candidate, but the line AFTER (first + // non-blank of suffix) is "- one" — that's fine, + // a list opening doesn't change the prefix's + // rendering. So the boundary BEFORE the list is + // accepted. + want: len("Para.\n\n"), + }, + { + name: "list interior: no boundary", + content: "- one\n- two\n", + // no blank line at all. + want: -1, + }, + { + name: "closed list then paragraph", + content: "- one\n- two\n\nPara.", + // blank line after the list. Last non-blank line + // of prefix is "- two" — a list item — so the + // candidate is REJECTED. (Conservative: we don't + // know the list is "closed" without looking at + // what follows.) + want: -1, + }, + { + name: "table at end", + content: "Para.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |\n", + // blank-line candidate is between "Para." and + // table opener. Last non-blank line of prefix is + // "Para." — fine. Line AFTER is "| a | b |" + // which is a table line; doesn't retroactively + // change "Para." Boundary accepted. + want: len("Para.\n\n"), + }, + { + name: "table interior with internal blank line: no late boundary", + content: "| a | b |\n| --- | --- |\n\n| 1 | 2 |\n", + // the blank line in the middle is followed by + // another table line. Last non-blank line of + // prefix is "| --- | --- |" which contains a + // pipe — we reject. + want: -1, + }, + { + name: "block quote at end", + content: "Para.\n\n> quoted\n> still quoted\n", + // Last non-blank line of any prefix that ends + // inside the quote block is a "> ..." line — + // rejected. The blank line BEFORE the quote + // gives a prefix of "Para.\n\n" — last non-blank + // "Para." — accepted. + want: len("Para.\n\n"), + }, + { + name: "setext underline pending", + content: "Heading\n\n=====\n", + // blank line between "Heading" and "=====". + // Prefix = "Heading\n\n", last non-blank "Heading" + // — fine. But the FIRST non-blank line of the + // suffix is "=====", a setext-underline + // candidate. Splitting here would render the + // prefix as a paragraph "Heading", but the + // canonical render would treat the whole thing + // as a setext header. Reject. + // + // (Note: per CommonMark, a blank line between a + // paragraph and an underline actually breaks the + // setext, so the setext interpretation may not + // apply. But the boundary check is conservative + // — being wrong costs one slow frame, being + // over-aggressive costs visible breakage.) + want: -1, + }, + { + name: "indented code at end of prefix", + content: "Para.\n\n code line\n\nNext.", + // prefix candidates: + // "Para.\n\n" — last non-blank "Para.", accepted + // "Para.\n\n code line\n\n" — last non-blank + // is " code line" which is indented 4 + // spaces — REJECTED. + // Latest accepted is the first. + want: len("Para.\n\n"), + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + got := findSafeMarkdownBoundary(c.content) + require.Equalf(t, c.want, got, + "findSafeMarkdownBoundary(%q) = %d, want %d", c.content, got, c.want) + if got > 0 { + // Boundary must point to the start of a line + // (i.e. just after a newline) when the prefix + // is non-empty. + require.True(t, got <= len(c.content), + "boundary %d out of range (len=%d)", got, len(c.content)) + if got > 0 && got <= len(c.content) { + require.Equal(t, byte('\n'), c.content[got-1], + "boundary %d does not sit immediately after a newline", got) + } + } + }) + } +} + +// ----------------------------------------------------------------------- +// T2: streaming-equivalence tests. +// ----------------------------------------------------------------------- + +// streamingScenarios returns the four canonical document shapes +// that exercise different boundary-detection paths. +func streamingScenarios() []struct { + name string + doc string +} { + return []struct { + name string + doc string + }{ + { + name: "plain-paragraphs", + doc: strings.Join([]string{ + "This is the first paragraph of the document.", + "", + "Here is the second paragraph; it has some words.", + "", + "And a third paragraph for good measure.", + "", + "Finally a fourth paragraph to push past one boundary.", + }, "\n"), + }, + { + name: "paragraphs-with-fence", + doc: strings.Join([]string{ + "Intro paragraph.", + "", + "Some explanatory prose before the code.", + "", + "```go", + "func hello() {", + "\tfmt.Println(\"hi\")", + "}", + "```", + "", + "And a closing paragraph after the code block.", + }, "\n"), + }, + { + name: "paragraphs-with-list", + doc: strings.Join([]string{ + "Intro paragraph.", + "", + "- list item one", + "- list item two", + "- list item three", + "", + "Trailing paragraph.", + }, "\n"), + }, + { + name: "paragraphs-with-table", + doc: strings.Join([]string{ + "Intro paragraph.", + "", + "| col a | col b |", + "| ----- | ----- |", + "| 1 | 2 |", + "| 3 | 4 |", + "", + "Trailing paragraph after the table.", + }, "\n"), + }, + } +} + +// progressivePrefixes splits doc into n monotonically growing +// byte prefixes, ending with the full document. n>=1. +func progressivePrefixes(doc string, n int) []string { + if n < 1 { + n = 1 + } + out := make([]string, 0, n) + for i := 1; i <= n; i++ { + // integer scaling so the last entry is exactly len(doc) + size := len(doc) * i / n + if i == n { + size = len(doc) + } + out = append(out, doc[:size]) + } + return out +} + +// TestStreamingMarkdown_FinalVisuallyEquivalent drives a sequence +// of progressive prefixes through streamingMarkdown and asserts +// the FINAL output is visually equivalent (per design principle +// D) to a fresh full-document render. Strict byte-equality is +// not the bar — see the comment in normalizeRender for why. +func TestStreamingMarkdown_FinalVisuallyEquivalent(t *testing.T) { + t.Parallel() + + const width = 80 + const steps = 15 + + for _, sc := range streamingScenarios() { + t.Run(sc.name, func(t *testing.T) { + t.Parallel() + renderer := newTestRenderer(t, width) + var sm streamingMarkdown + prefixes := progressivePrefixes(sc.doc, steps) + + var lastOut string + for _, p := range prefixes { + lastOut = sm.Render(p, width, renderer) + } + + fresh := freshRender(t, sc.doc, width) + require.Equal(t, normalizeRender(fresh), normalizeRender(lastOut), + "final streaming output must match a fresh full render visually") + }) + } +} + +// TestStreamingMarkdown_IntermediateOutputsPlausible asserts that +// every intermediate flush returns a non-empty string and does +// not leak raw markdown source through to the user. This is the +// "visually plausible" half of T2. +func TestStreamingMarkdown_IntermediateOutputsPlausible(t *testing.T) { + t.Parallel() + + const width = 80 + const steps = 12 + + for _, sc := range streamingScenarios() { + t.Run(sc.name, func(t *testing.T) { + t.Parallel() + renderer := newTestRenderer(t, width) + var sm streamingMarkdown + + for i, p := range progressivePrefixes(sc.doc, steps) { + if p == "" { + continue + } + out := sm.Render(p, width, renderer) + require.NotEmptyf(t, out, "step %d: empty render for prefix len %d", i, len(p)) + require.Falsef(t, containsRawMarkdownSource(out), + "step %d: render leaked raw markdown source.\nprefix=%q\nout=%s", + i, p, normalizeRender(out)) + } + }) + } +} + +// ----------------------------------------------------------------------- +// T3: cache invalidation tests. +// ----------------------------------------------------------------------- + +// TestStreamingMarkdown_WidthChangeInvalidates asserts that a +// width change blows away the cached prefix so the next render +// is keyed against the new width. We can't observe the cache +// directly without reaching into the struct, so we assert the +// observable contract: after a width change, the rendered output +// reflects the new width AND the streamingMarkdown's internal +// cache fields are reset to the new state. +func TestStreamingMarkdown_WidthChangeInvalidates(t *testing.T) { + t.Parallel() + + doc := "Para one.\n\nPara two.\n\nPara three." + r80 := newTestRenderer(t, 80) + r40 := newTestRenderer(t, 40) + var sm streamingMarkdown + + out80 := sm.Render(doc, 80, r80) + require.Equal(t, 80, sm.width, "width must be cached after first render") + cachedPrefix := sm.stablePrefix + + out40 := sm.Render(doc, 40, r40) + require.Equal(t, 40, sm.width, "width change must update cached width") + require.NotEqual(t, out80, out40, + "different widths must produce different rendered output") + // stablePrefix may legitimately have re-advanced after the + // reset (tryAdvanceFromEmpty), but if it has, it can no + // longer carry the OLD width's render. We assert the cache + // reset by checking that the cached prefix length is at + // most the current content length. + require.True(t, len(sm.stablePrefix) <= len(doc), + "stable prefix must be a prefix of the current content") + _ = cachedPrefix +} + +// TestStreamingMarkdown_NonPrefixContentInvalidates verifies +// that content which is NOT a prefix-extension of the cached +// stable prefix triggers a Reset and a fresh render path. This +// guards the "user retried the turn" case. +func TestStreamingMarkdown_NonPrefixContentInvalidates(t *testing.T) { + t.Parallel() + + const width = 80 + r := newTestRenderer(t, width) + var sm streamingMarkdown + + // Drive a streaming sequence so the cache picks up a stable + // prefix. + doc := "Para one.\n\nPara two.\n\nPara three." + for _, p := range progressivePrefixes(doc, 6) { + _ = sm.Render(p, width, r) + } + require.NotEmpty(t, sm.stablePrefix, + "stable prefix must be populated after streaming a multi-paragraph doc") + + // Now switch to entirely different content (user retried). + other := "Completely different opening paragraph.\n\nAnd a second." + out := sm.Render(other, width, r) + require.NotEmpty(t, out) + // stablePrefix must be a prefix of `other`, i.e. cache was + // reset off the OLD content. + require.True(t, strings.HasPrefix(other, sm.stablePrefix), + "stable prefix must be reset to a prefix of the new content") + + // Visual equivalence to a fresh render of `other`. + fresh := freshRender(t, other, width) + require.Equal(t, normalizeRender(fresh), normalizeRender(out), + "render after non-prefix content change must match a fresh render") +} + +// TestStreamingMarkdown_ResetClearsCache asserts Reset() drops +// every cached field; the next render is necessarily a full +// render path. +func TestStreamingMarkdown_ResetClearsCache(t *testing.T) { + t.Parallel() + + const width = 80 + r := newTestRenderer(t, width) + var sm streamingMarkdown + + doc := "Para one.\n\nPara two.\n\nPara three." + _ = sm.Render(doc, width, r) + // The sample doc has safe boundaries so the cache should + // have advanced. If for some reason it didn't, we still + // want Reset to be a no-op-safe operation; assert the + // post-Reset state directly. + sm.Reset() + require.Equal(t, 0, sm.width) + require.Equal(t, "", sm.stablePrefix) + require.Equal(t, "", sm.stablePrefixRender) + + // Next render must be a full render path. Drive one step + // and verify the output matches a fresh full render. + out := sm.Render(doc, width, r) + fresh := freshRender(t, doc, width) + require.Equal(t, normalizeRender(fresh), normalizeRender(out)) +} + +// ----------------------------------------------------------------------- +// T4: fallback safety. +// ----------------------------------------------------------------------- + +// TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders covers +// the "one giant table being built character by character" case. +// Every flush must fall back to a full render; the cache must +// not advance into an unsafe state. We compare each flush to a +// fresh full render of the same prefix; bytes must match for +// each prefix individually. +// +// (Byte equality is sound here because no concatenation happens: +// the streaming path delegates straight to renderer.Render when +// the cache is empty and no safe boundary exists.) +func TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders(t *testing.T) { + t.Parallel() + + const width = 80 + + // One growing table — no blank lines anywhere, so no + // boundary candidate is ever found. + doc := strings.Join([]string{ + "| col a | col b | col c |", + "| ----- | ----- | ----- |", + "| 1 | 2 | 3 |", + "| 4 | 5 | 6 |", + "| 7 | 8 | 9 |", + "| 10 | 11 | 12 |", + "| 13 | 14 | 15 |", + "| 16 | 17 | 18 |", + "| 19 | 20 | 21 |", + "| 22 | 23 | 24 |", + }, "\n") + require.Equal(t, -1, findSafeMarkdownBoundary(doc), + "sanity check: no blank lines, no safe boundary") + + r := newTestRenderer(t, width) + var sm streamingMarkdown + + prefixes := progressivePrefixes(doc, 10) + for i, p := range prefixes { + if p == "" { + continue + } + out := sm.Render(p, width, r) + fresh := freshRender(t, p, width) + require.Equalf(t, fresh, out, + "step %d (len=%d): streaming output must byte-equal a fresh render when boundary detection fails", + i, len(p)) + } + // Cache must remain empty: no boundary was ever found, no + // width change occurred, no advance ever cached anything. + require.Equal(t, "", sm.stablePrefix, + "stable prefix must remain empty when no safe boundary ever exists") +} + +// TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash is the +// minimum-viability assertion of T4: even when boundary +// detection fails on every flush the streaming path must not +// crash and must produce non-empty output for non-empty input. +func TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash(t *testing.T) { + t.Parallel() + + const width = 80 + r := newTestRenderer(t, width) + var sm streamingMarkdown + + // A deeply-pathological input: a single line that grows + // one character at a time. There is never a blank-line + // separator so the cache is never advanced. + src := "The quick brown fox jumps over the lazy dog." + for i := 1; i <= len(src); i++ { + out := sm.Render(src[:i], width, r) + require.NotEmpty(t, out, "streaming output must not be empty for non-empty input") + } +} + +// ----------------------------------------------------------------------- +// Integration assertions on the wired-in path. +// ----------------------------------------------------------------------- + +// ----------------------------------------------------------------------- +// T5 / T6 / T7: anywhere-in-prefix hazards (B1 / B2 / B3 from the +// F8 round-2 review). For each hazard we drive every progressive +// prefix of a document that exercises the hazard through the cache +// and assert two contracts: +// +// 1. The cached stable prefix never contains the hazard. If the +// hazard line is at byte offset H, then after every flush +// len(sm.stablePrefix) <= H. This is the "no silent +// corruption" half — the algorithm cannot accept a boundary +// that splits across the hazard. +// +// 2. The final flush is visually equivalent to a fresh full +// render of the complete document. This is the same T2-style +// equivalence assertion ported to the new doc shapes. +// ----------------------------------------------------------------------- + +// nonBlankLines returns the non-blank visible lines of s with +// per-line trailing whitespace trimmed. Used to compare two +// rendered fragments for content equivalence when paragraph- +// margin behaviour legitimately differs between a single fresh +// render and a streaming split render (per F8 design principle D +// — visual equivalence is the bar, byte-equivalence is not). +// +// Some glamour block types (notably HTML blocks and reference +// link definitions) interact with adjacent paragraph blocks +// during a single render — adjacency effectively suppresses the +// blank-line margin between blocks. When the streaming path +// renders the prefix and trail in separate calls, the seam is +// re-introduced as a blank line. The visible TEXT is identical; +// only the inter-block margin differs. +func nonBlankLines(s string) []string { + clean := stripANSI(s) + out := make([]string, 0) + for _, l := range strings.Split(clean, "\n") { + l = strings.TrimRight(l, " \t") + if strings.TrimSpace(l) == "" { + continue + } + out = append(out, l) + } + return out +} + +// runProgressiveBoundaryRespectTest is the shared body of T5/T6/T7. +// It accepts a document and the byte offset of the line whose +// PRESENCE in the prefix must trigger the hazard reject; the +// cached stable prefix may never extend past hazardLineOffset. +// +// The final-output equivalence check is content-based (non-blank +// lines compared) rather than full-normalization: see +// nonBlankLines for the reason. +func runProgressiveBoundaryRespectTest(t *testing.T, doc string, hazardLineOffset int) { + t.Helper() + const width = 80 + const steps = 25 + + renderer := newTestRenderer(t, width) + var sm streamingMarkdown + + prefixes := progressivePrefixes(doc, steps) + var lastOut string + for i, p := range prefixes { + if p == "" { + continue + } + lastOut = sm.Render(p, width, renderer) + require.NotEmptyf(t, lastOut, "step %d: empty render", i) + require.LessOrEqualf(t, len(sm.stablePrefix), hazardLineOffset, + "step %d: cached stable prefix advanced past the hazard line\n"+ + "prefix len=%d, hazard at %d, sm.stablePrefix=%q", + i, len(sm.stablePrefix), hazardLineOffset, sm.stablePrefix) + } + + fresh := freshRender(t, doc, width) + require.Equal(t, nonBlankLines(fresh), nonBlankLines(lastOut), + "final streaming output must contain the same non-blank lines as a fresh full render") +} + +// TestStreamingMarkdown_LooseListContinuation locks in the B1 fix. +// A loose list followed by a continuation paragraph and then a +// trailing paragraph creates a candidate boundary between the list +// item and its continuation; the trailing non-blank line of that +// candidate prefix is the continuation paragraph (not a list +// marker), so the line-only check would accept it. The +// anywhere-in-prefix list-marker check rejects it. +func TestStreamingMarkdown_LooseListContinuation(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "- item one", + "", + " continuation paragraph still belongs to item one", + "", + "- item two", + "", + "Trailing paragraph after the list.", + }, "\n") + + // The first list marker line begins after "Intro paragraph.\n\n". + // The cached stable prefix may include that boundary (BEFORE + // the list opens) but must never advance into the list. + hazardOffset := strings.Index(doc, "- item one") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestStreamingMarkdown_HTMLBlock locks in the B2 fix. A raw HTML +// block followed by a paragraph creates a candidate boundary +// between the closed HTML block and the trailing paragraph. The +// anywhere-in-prefix HTML-opener check rejects any boundary that +// would include the HTML block in the stable prefix. +func TestStreamingMarkdown_HTMLBlock(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "
", + "some block content", + "
", + "", + "Trailing paragraph after the HTML block.", + }, "\n") + + hazardOffset := strings.Index(doc, "
") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestStreamingMarkdown_HTMLBlockType7 covers HTML block type 7 +// (CommonMark): a generic open/close tag whose name is NOT in the +// fixed type-6 set still opens an HTML block and must forfeit any +// boundary that would split the block off from following content. +func TestStreamingMarkdown_HTMLBlockType7(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "", + "some block content", + "", + "", + "Trailing paragraph after the custom-tag block.", + }, "\n") + + hazardOffset := strings.Index(doc, "") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestStreamingMarkdown_LinkRefDefinition locks in the B3 fix. A +// reference link definition followed by a paragraph that uses the +// reference creates a boundary candidate between the def and the +// paragraph; rendering them in separate glamour passes loses the +// definition. The anywhere-in-prefix ref-def check rejects. +func TestStreamingMarkdown_LinkRefDefinition(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "[ref]: http://example.com", + "", + "Trailing paragraph that links to [the example][ref] inline.", + }, "\n") + + hazardOffset := strings.Index(doc, "[ref]:") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestAssistantStreamingContent_ResetOnClearCache guards the +// integration contract that ClearItemCaches (style change) drops +// the streaming-markdown cache. Without this, a style change +// would leave the OLD style's ANSI sequences embedded in the +// stable-prefix render and the next flush would visually mix +// styles. +func TestAssistantStreamingContent_ResetOnClearCache(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + doc := "Para one.\n\nPara two.\n\nPara three." + msg := finishedAssistantMessage("stream-clear", doc) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 80 + _ = item.RawRender(width) + // Drive a second message that extends the content so the + // streaming cache has a chance to advance (if it would). + doc2 := doc + "\n\nFour." + item.SetMessage(finishedAssistantMessage("stream-clear", doc2)) + _ = item.RawRender(width) + + // Now wipe the caches the way ClearItemCaches does. + item.clearCache() + + require.Equal(t, "", item.streamingContent.stablePrefix, + "clearCache must Reset the streaming-markdown cache") + require.Equal(t, "", item.streamingContent.stablePrefixRender) + require.Equal(t, 0, item.streamingContent.width) +} diff --git a/internal/ui/chat/messages.go b/internal/ui/chat/messages.go index 6c86cb74951e0bf8f20fe7af4fa420b4527936ca..1e83ec6f6797a866f3587ad6f349bb4c760b1eb3 100644 --- a/internal/ui/chat/messages.go +++ b/internal/ui/chat/messages.go @@ -75,6 +75,12 @@ type SendMsg struct { } type highlightableMessageItem struct { + // version is the parent item's version counter. SetHighlight + // bumps it on every observable change so the F6 list memo and + // any frozen entry get invalidated when a selection drag enters + // or leaves the item. + version *list.Versioned + startLine int startCol int endLine int @@ -103,13 +109,20 @@ func (h *highlightableMessageItem) SetHighlight(startLine int, startCol int, end // Adjust columns for the style's left inset (border + padding) since we // highlight the content only. offset := MessageLeftPaddingTotal + newStartCol := max(0, startCol-offset) + newEndCol := endCol + if endCol >= 0 { + newEndCol = max(0, endCol-offset) + } + if h.startLine == startLine && h.startCol == newStartCol && h.endLine == endLine && h.endCol == newEndCol { + return + } h.startLine = startLine - h.startCol = max(0, startCol-offset) + h.startCol = newStartCol h.endLine = endLine - if endCol >= 0 { - h.endCol = max(0, endCol-offset) - } else { - h.endCol = endCol + h.endCol = newEndCol + if h.version != nil { + h.version.Bump() } } @@ -118,8 +131,9 @@ func (h *highlightableMessageItem) Highlight() (startLine int, startCol int, end return h.startLine, h.startCol, h.endLine, h.endCol } -func defaultHighlighter(sty *styles.Styles) *highlightableMessageItem { +func defaultHighlighter(sty *styles.Styles, v *list.Versioned) *highlightableMessageItem { return &highlightableMessageItem{ + version: v, startLine: -1, startCol: -1, endLine: -1, @@ -135,12 +149,17 @@ type cacheClearable interface { } // ClearItemCaches drops any cached rendered output on each item so the -// next render uses the current styles. +// next render uses the current styles. It also bumps each item's +// version so the F6 list-level memo invalidates frozen entries on +// the next render. func ClearItemCaches(items []MessageItem) { for _, item := range items { if cc, ok := item.(cacheClearable); ok { cc.clearCache() } + if v, ok := item.(interface{ Bump() }); ok { + v.Bump() + } } } @@ -156,6 +175,20 @@ type cachedMessageItem struct { // width and height are the dimensions of the cached render width int height int + + // prefixedRendered caches the per-line-prefixed Render output (the + // result of splitting RawRender by newlines and prepending a focus + // or selection prefix to every line). Items rebuild this every + // frame today; caching it keyed by (prefixedWidth, prefixedKey) + // turns Render into a pointer return when item state is stable. + // + // Invalidation lives in clearCache; callers must additionally + // bypass this cache whenever the prefixed output would not be + // stable (spinner ticks, active highlight ranges) by not calling + // setCachedPrefixedRender for those frames. + prefixedRendered string + prefixedWidth int + prefixedKey uint64 } // getCachedRender returns the cached render if it exists for the given width. @@ -173,21 +206,57 @@ func (c *cachedMessageItem) setCachedRender(rendered string, width, height int) c.height = height } +// getCachedPrefixedRender returns the cached prefixed render if it exists +// for the given (width, key). The key encodes any state that changes the +// per-line prefix (focused/blurred, compact, ...). +func (c *cachedMessageItem) getCachedPrefixedRender(width int, key uint64) (string, bool) { + if c.prefixedRendered != "" && c.prefixedWidth == width && c.prefixedKey == key { + return c.prefixedRendered, true + } + return "", false +} + +// setCachedPrefixedRender stores the cached prefixed render. +func (c *cachedMessageItem) setCachedPrefixedRender(rendered string, width int, key uint64) { + c.prefixedRendered = rendered + c.prefixedWidth = width + c.prefixedKey = key +} + // clearCache clears the cached render. func (c *cachedMessageItem) clearCache() { c.rendered = "" c.width = 0 c.height = 0 + c.prefixedRendered = "" + c.prefixedWidth = 0 + c.prefixedKey = 0 } // focusableMessageItem is a base struct for message items that can be focused. type focusableMessageItem struct { + // version is the parent item's version counter. SetFocused + // bumps it whenever focus actually flips so the F6 list memo + // invalidates the per-line focus prefix. + version *list.Versioned focused bool } +// newFocusableMessageItem returns a focusableMessageItem wired to the +// shared version counter. +func newFocusableMessageItem(v *list.Versioned) *focusableMessageItem { + return &focusableMessageItem{version: v} +} + // SetFocused implements MessageItem. func (f *focusableMessageItem) SetFocused(focused bool) { + if f.focused == focused { + return + } f.focused = focused + if f.version != nil { + f.version.Bump() + } } // AssistantInfoID returns a stable ID for assistant info items. @@ -197,6 +266,7 @@ func AssistantInfoID(messageID string) string { // AssistantInfoItem renders model info and response time after assistant completes. type AssistantInfoItem struct { + *list.Versioned *cachedMessageItem id string @@ -209,6 +279,7 @@ type AssistantInfoItem struct { // NewAssistantInfoItem creates a new AssistantInfoItem. func NewAssistantInfoItem(sty *styles.Styles, message *message.Message, cfg *config.Config, lastUserMessageTime time.Time) MessageItem { return &AssistantInfoItem{ + Versioned: list.NewVersioned(), cachedMessageItem: &cachedMessageItem{}, id: AssistantInfoID(message.ID), message: message, @@ -218,6 +289,13 @@ func NewAssistantInfoItem(sty *styles.Styles, message *message.Message, cfg *con } } +// Finished implements list.Item. Assistant info blocks render a fixed +// model/duration footer once the assistant turn finishes; the data +// is immutable after construction so the entry is safe to freeze. +func (a *AssistantInfoItem) Finished() bool { + return true +} + // ID implements MessageItem. func (a *AssistantInfoItem) ID() string { return a.id @@ -237,12 +315,20 @@ func (a *AssistantInfoItem) RawRender(width int) string { // Render implements MessageItem. func (a *AssistantInfoItem) Render(width int) string { + // AssistantInfoItem uses a single, state-independent prefix; key 0 + // is sufficient. The cache is invalidated whenever the underlying + // cachedMessageItem render is cleared. + if cached, ok := a.getCachedPrefixedRender(width, 0); ok { + return cached + } prefix := a.sty.Messages.SectionHeader.Render() lines := strings.Split(a.RawRender(width), "\n") for i, line := range lines { lines[i] = prefix + line } - return strings.Join(lines, "\n") + out := strings.Join(lines, "\n") + a.setCachedPrefixedRender(out, width, 0) + return out } func (a *AssistantInfoItem) renderContent(width int) string { diff --git a/internal/ui/chat/prefix_cache_test.go b/internal/ui/chat/prefix_cache_test.go new file mode 100644 index 0000000000000000000000000000000000000000..443f5a68dab73d8e518b37d867fc9aa8c758ca25 --- /dev/null +++ b/internal/ui/chat/prefix_cache_test.go @@ -0,0 +1,196 @@ +package chat + +import ( + "strings" + "testing" + "time" + + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/ui/attachments" + "github.com/charmbracelet/crush/internal/ui/styles" + "github.com/stretchr/testify/require" +) + +// finishedAssistantMessage builds an assistant message with text content and a +// finish part so AssistantMessageItem.isSpinning returns false and the +// prefix cache is exercised. +func finishedAssistantMessage(id, text string) *message.Message { + return &message.Message{ + ID: id, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.TextContent{Text: text}, + message.Finish{Reason: message.FinishReasonEndTurn, Time: time.Now().Unix()}, + }, + } +} + +// TestAssistantMessageItemRender_PrefixCacheFocusBlur covers the F3 invariant +// that focus → blur → focus produces the correct prefix every time and never +// leaks the previous focus state out of the cache. +func TestAssistantMessageItemRender_PrefixCacheFocusBlur(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := finishedAssistantMessage("m1", "Hello world from the cache test.") + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 60 + + item.SetFocused(true) + focused1 := item.Render(width) + focused2 := item.Render(width) + require.Equal(t, focused1, focused2, "second render must hit the cache and match the first") + + item.SetFocused(false) + blurred1 := item.Render(width) + require.NotEqual(t, focused1, blurred1, "blur must produce a different prefixed render than focus") + + item.SetFocused(true) + focused3 := item.Render(width) + require.Equal(t, focused1, focused3, "re-focus must produce identical output to the original focused render") +} + +// TestAssistantMessageItemRender_PrefixCacheWidthInvalidates asserts that a +// width change does not return the previous width's cached output. +func TestAssistantMessageItemRender_PrefixCacheWidthInvalidates(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := finishedAssistantMessage("m2", "Some content that wraps differently at different widths so the rendered output diverges.") + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + item.SetFocused(true) + + narrow := item.Render(40) + wide := item.Render(100) + require.NotEqual(t, narrow, wide, "different widths must produce different rendered output") + + narrowAgain := item.Render(40) + require.Equal(t, narrow, narrowAgain, "returning to the original width must hit (or repopulate) the cache with the same output") +} + +// TestAssistantMessageItemRender_PrefixCacheHighlightOnTop guarantees that +// activating a highlight range bypasses the prefix cache so selection drags +// reflect immediately, and that clearing the highlight returns to the cached +// prefixed output unchanged. +func TestAssistantMessageItemRender_PrefixCacheHighlightOnTop(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := finishedAssistantMessage("m3", "Hello world from the highlight test.") + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + item.SetFocused(true) + + const width = 60 + plain := item.Render(width) + + // Activating a highlight must change the rendered output (selection + // painted on top) without poisoning the cache for the un-highlighted + // state that follows. + item.SetHighlight(0, 0, 0, 5) + highlighted := item.Render(width) + require.NotEqual(t, plain, highlighted, "active highlight must change Render output") + + // Clear the highlight; the cached un-highlighted prefix render must + // be returned unchanged. + item.SetHighlight(-1, -1, -1, -1) + plainAfter := item.Render(width) + require.Equal(t, plain, plainAfter, "clearing the highlight must restore the cached prefixed output exactly") +} + +// TestUserMessageItemRender_PrefixCacheFocusBlur is the user-message +// counterpart of the assistant focus/blur cache test. +func TestUserMessageItemRender_PrefixCacheFocusBlur(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := &message.Message{ + ID: "u1", + Role: message.User, + Parts: []message.ContentPart{ + message.TextContent{Text: "Hello from the user."}, + }, + } + r := attachments.NewRenderer( + sty.Attachments.Normal, + sty.Attachments.Deleting, + sty.Attachments.Image, + sty.Attachments.Text, + ) + item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem) + + const width = 60 + + item.SetFocused(true) + focused1 := item.Render(width) + focused2 := item.Render(width) + require.Equal(t, focused1, focused2) + + item.SetFocused(false) + blurred := item.Render(width) + require.NotEqual(t, focused1, blurred) + + item.SetFocused(true) + focused3 := item.Render(width) + require.Equal(t, focused1, focused3) +} + +// TestCachedMessageItem_PrefixCacheSemantics covers the constant-prefix +// path used by AssistantInfoItem and the (width, key) keying used by every +// item, against the underlying cachedMessageItem helper directly. This +// avoids constructing a full *config.Config with an initialized provider +// map just to exercise cache plumbing that is identical for all callers. +func TestCachedMessageItem_PrefixCacheSemantics(t *testing.T) { + t.Parallel() + + c := &cachedMessageItem{} + + // Empty cache: miss. + _, ok := c.getCachedPrefixedRender(80, 0) + require.False(t, ok) + + // Set then hit at the same (width, key). + c.setCachedPrefixedRender("hello", 80, 0) + got, ok := c.getCachedPrefixedRender(80, 0) + require.True(t, ok) + require.Equal(t, "hello", got) + + // Different width: miss. + _, ok = c.getCachedPrefixedRender(120, 0) + require.False(t, ok) + + // Different key (focused vs blurred): miss. + _, ok = c.getCachedPrefixedRender(80, 1) + require.False(t, ok) + + // clearCache drops the prefixed cache too. + c.setCachedRender("raw", 80, 1) + c.setCachedPrefixedRender("hello", 80, 0) + c.clearCache() + _, ok = c.getCachedPrefixedRender(80, 0) + require.False(t, ok, "clearCache must drop the prefixed render cache") + _, _, ok = c.getCachedRender(80) + require.False(t, ok, "clearCache must also drop the raw render cache") +} + +// TestAssistantMessageItemRender_PrefixCacheNoCacheLeak guards against a +// regression where the cache returned the prefixed output of the previous +// width. We verify that the cached output for width=W contains the W-sized +// prefix and not a stale wider one by checking that line lengths are +// consistent on cache hit. +func TestAssistantMessageItemRender_PrefixCacheNoCacheLeak(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + msg := finishedAssistantMessage("m4", strings.Repeat("word ", 40)) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + item.SetFocused(true) + + out80 := item.Render(80) + out120 := item.Render(120) + require.NotEqual(t, out80, out120) + + // Hit each cached entry again and confirm stability. + require.Equal(t, out80, item.Render(80)) + require.Equal(t, out120, item.Render(120)) +} diff --git a/internal/ui/chat/streaming_markdown.go b/internal/ui/chat/streaming_markdown.go new file mode 100644 index 0000000000000000000000000000000000000000..98535a8543cf6b3b99a3372297bcc17094c918e6 --- /dev/null +++ b/internal/ui/chat/streaming_markdown.go @@ -0,0 +1,740 @@ +package chat + +import ( + "strings" + + "charm.land/glamour/v2" + "github.com/charmbracelet/crush/internal/ui/common" +) + +// streamingMarkdown caches a "stable prefix" glamour render so each +// streaming flush only re-renders the trailing portion of the +// document. F8 of docs/notes/2026-05-12-chat-rendering-perf.md. +// +// The boundary between "stable" and "trailing" is detected by +// [findSafeMarkdownBoundary]: a position immediately after a blank +// line at which we can prove no markdown construct is open +// (fenced code block, list, table, block quote, setext header). +// +// Two renders concatenated are NOT generally equal to a single +// render of the whole document — glamour's wrap state is reset +// between calls. The boundary check is therefore deliberately +// conservative; whenever it has the slightest doubt the call +// falls back to a full render and the cache is left untouched. +// +// Invariants: +// +// - stablePrefix is always a literal byte prefix of the most +// recently rendered content. If a new content does not have +// stablePrefix as its prefix the cache is dropped. +// - stablePrefixRender is the glamour render of stablePrefix +// alone, with surrounding whitespace trimmed for clean +// concatenation. +// - width is the glamour wrap width that produced +// stablePrefixRender. A width change drops the cache. +type streamingMarkdown struct { + width int + stablePrefix string + stablePrefixRender string +} + +// Reset drops every cached field. After Reset the next Render call +// is guaranteed to be a full render. +func (s *streamingMarkdown) Reset() { + s.width = 0 + s.stablePrefix = "" + s.stablePrefixRender = "" +} + +// Render returns the glamour render of content at the given width, +// reusing the cached stable-prefix render when it is safe to do so. +// On any uncertainty the call falls back to a full render via +// renderer and leaves the cache untouched (or drops it). +// +// The returned string has its trailing newline trimmed to match +// the existing renderMarkdown contract on AssistantMessageItem. +// +// Concurrency: glamour's Render is stateful and not safe for +// concurrent invocation on a shared renderer. Crush's TUI is +// single-threaded so production never contends, but parallel +// callers (most notably the test suite) must serialize. We hold +// [common.LockMarkdownRenderer] for the entire prefix + +// trailing render sequence so other goroutines cannot interleave +// their own Render calls and corrupt goldmark's BlockStack. +func (s *streamingMarkdown) Render(content string, width int, renderer *glamour.TermRenderer) string { + mu := common.LockMarkdownRenderer(renderer) + mu.Lock() + defer mu.Unlock() + full := func() string { + out, err := renderer.Render(content) + if err != nil { + return content + } + return strings.TrimSuffix(out, "\n") + } + + // Width change OR content not a prefix-extension: drop cache, + // full render, optionally try to seed a fresh boundary on this + // call (step "f" in the design note). + if width != s.width || !strings.HasPrefix(content, s.stablePrefix) { + s.Reset() + s.width = width + out := full() + s.tryAdvanceFromEmpty(content, width, renderer) + return out + } + + boundary := findSafeMarkdownBoundary(content) + if boundary < 0 { + // No safe boundary anywhere yet. Full render; do not + // modify the cache (a future flush may find one). + return full() + } + + if boundary <= len(s.stablePrefix) { + // Cached prefix already covers an at-least-as-late + // boundary. Render the trailing partial fresh and glue. + trail := content[len(s.stablePrefix):] + return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer)) + } + + // boundary > len(stablePrefix): we have a NEW chunk of safe + // content. Render the new chunk, append to stablePrefixRender, + // promote the boundary, then render the remaining trail. + newChunk := content[len(s.stablePrefix):boundary] + newChunkRender := s.renderTrailing(newChunk, renderer) + s.stablePrefixRender = glueRenders(s.stablePrefixRender, newChunkRender) + s.stablePrefix = content[:boundary] + + trail := content[boundary:] + if trail == "" { + // boundary == len(content): no trailing content. Returning + // the cached prefix render directly is correct. + return s.stablePrefixRender + } + return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer)) +} + +// tryAdvanceFromEmpty seeds the cache from a fresh state. We've +// already paid the cost of a full render of `content`; if there is +// a safe boundary inside it, render the prefix once more (cheap +// relative to the full render we just did) and cache it so the +// next flush can avoid the full work. +// +// This is the optional optimisation step "f" from the design +// note. We render the prefix separately rather than try to +// recover it from the full render output because two renders +// concatenated ≠ a single render of the whole, and we prefer the +// cached prefix render to be byte-for-byte what we'd produce on a +// future cached call. +func (s *streamingMarkdown) tryAdvanceFromEmpty(content string, width int, renderer *glamour.TermRenderer) { + boundary := findSafeMarkdownBoundary(content) + if boundary <= 0 { + return + } + prefix := content[:boundary] + out, err := renderer.Render(prefix) + if err != nil { + return + } + s.stablePrefix = prefix + s.stablePrefixRender = trimGlamourMargins(out) + s.width = width +} + +// renderTrailing renders a trailing partial as a fresh glamour +// document and trims the surrounding whitespace so it can be +// concatenated to a cached prefix render without doubled blank +// lines. +func (s *streamingMarkdown) renderTrailing(text string, renderer *glamour.TermRenderer) string { + if text == "" { + return "" + } + out, err := renderer.Render(text) + if err != nil { + return text + } + return trimGlamourMargins(out) +} + +// glueRenders concatenates two glamour-rendered fragments with a +// single blank line separator. Glamour outputs typically carry +// their own surrounding margins; trimming on both sides and +// gluing with "\n\n" prevents the visible double-margin seam. +// +// Empty fragments are tolerated so the same helper works for the +// "boundary == len(content)" path where there is no trailing +// segment. +func glueRenders(prefix, trail string) string { + prefix = trimGlamourMargins(prefix) + trail = trimGlamourMargins(trail) + switch { + case prefix == "" && trail == "": + return "" + case prefix == "": + return trail + case trail == "": + return prefix + default: + return prefix + "\n\n" + trail + } +} + +// trimGlamourMargins strips leading and trailing whitespace +// (including newlines) from a glamour-rendered fragment. +// Glamour adds a leading blank line for documents that open with +// a heading or paragraph, plus a trailing newline; both must be +// removed before concatenation. +func trimGlamourMargins(s string) string { + return strings.Trim(s, " \t\n") +} + +// findSafeMarkdownBoundary returns the byte offset of the END of +// the latest safe boundary in content, i.e. the offset such that +// content[:boundary] is a valid stable-prefix candidate. The +// returned offset always points immediately after a blank-line +// separator, so concatenating a fresh render of content[boundary:] +// to a cached render of content[:boundary] does not require glamour +// to share state across the cut. +// +// Returns -1 when no safe boundary exists. SAFETY FIRST: any time +// we have the slightest doubt we return -1 and let the caller fall +// back to a full render. +// +// Decision tree, in order of preference (latest boundary wins): +// +// 1. Walk backward through every "blank line" position p such that +// content[:p] ends with "\n\n" (or "\n[ \t]*\n"). +// 2. For each candidate, check that content[:p] has an even +// number of triple-backtick fence lines (no open fenced +// block). Any odd count means we'd be cutting inside a fence +// and mis-syntax-highlighting the trailing partial. +// 2b. Reject if any line in content[:p] (outside fenced blocks) +// is a list-marker line, an HTML-block opener, or a link +// reference definition. See [prefixHasOpenHazard] for the +// reasoning behind these "anywhere in prefix" rejects. +// 3. Reject if the last non-blank line of content[:p] is: +// - a list item marker line ("^\s*([-*+]|\d+\.)\s") +// - a table line (contains "|") +// - a block quote ("^\s*>") +// - a setext header underline ("^=+\s*$" or "^-+\s*$") +// - an indented code line (4+ leading spaces or a tab) +// 4. Reject if the line immediately AFTER the boundary (skipping +// leading blank lines) looks like a setext underline (a line +// of '=' or '-' only). Rendering the prefix as a paragraph +// would change once the underline arrived; that's exactly the +// "splitting changes the prefix render" hazard §4.4 calls out. +// +// Returns the byte offset of the first character AFTER the blank +// line, i.e. the start of the trailing segment. +func findSafeMarkdownBoundary(content string) int { + if len(content) == 0 { + return -1 + } + + // Iterate every blank-line position from latest to earliest. + for p := blankLineBefore(content, len(content)); p > 0; p = blankLineBefore(content, p-1) { + if !isSafeBoundaryAt(content, p) { + continue + } + return p + } + return -1 +} + +// blankLineBefore returns the byte offset of the first character +// AFTER the latest blank-line separator that ends strictly before +// `until`. A blank-line separator is a sequence "\n([ \t]*\n)+" +// — one newline, then one or more lines containing only spaces or +// tabs and terminated by another newline. The returned offset is +// the start of the first non-blank line that follows the +// separator (or the position immediately after the final newline, +// if no further content remains). +// +// Returns -1 when no blank-line separator exists before `until`. +func blankLineBefore(content string, until int) int { + if until <= 0 { + return -1 + } + // Walk backward looking for a newline followed (after optional + // blank-line content) by another newline. We track the latest + // newline we've seen; if the next earlier newline has only + // blank chars between them, we have a blank-line separator + // and the boundary sits immediately after the latest newline. + end := until + for end > 0 { + nl := strings.LastIndexByte(content[:end], '\n') + if nl < 0 { + return -1 + } + // Look for an earlier newline whose gap to nl is empty + // or whitespace only. + prev := strings.LastIndexByte(content[:nl], '\n') + for prev >= 0 { + gap := content[prev+1 : nl] + if isBlankOrSpaces(gap) { + return nl + 1 + } + // Gap had non-whitespace; nl is not a blank-line + // separator. Move up: try with the earlier newline as + // the new "nl" candidate. + break + } + end = nl + } + return -1 +} + +// isBlankOrSpaces reports whether s consists entirely of spaces +// and tabs (or is empty). +func isBlankOrSpaces(s string) bool { + for i := range len(s) { + if s[i] != ' ' && s[i] != '\t' { + return false + } + } + return true +} + +// isSafeBoundaryAt reports whether content[:p] is a safe stable +// prefix. p must be a blank-line boundary (start of a line, with a +// blank line immediately preceding). +// +// Beyond the last-line checks, three "anywhere in the prefix" +// hazards force a reject because they cannot be reliably reasoned +// about by inspecting the trailing line alone. For each of these +// the simplest, safest rule was chosen — see prefixHasOpenHazard. +func isSafeBoundaryAt(content string, p int) bool { + prefix := content[:p] + + // (2) Even number of triple-backtick fence lines. + if countFenceLines(prefix)%2 != 0 { + return false + } + + // (2b) Anywhere-in-prefix hazards: open list (B1), HTML block + // opener (B2), reference link definition (B3). Any of these + // anywhere in the prefix forces a fallback. + if prefixHasOpenHazard(prefix) { + return false + } + + // (3) Inspect the last non-blank line of the prefix. + lastLine := lastNonBlankLine(prefix) + if lastLine != "" && lineOpensConstruct(lastLine) { + return false + } + + // (4) If anything follows, make sure it doesn't look like a + // setext underline that would retroactively turn the last + // paragraph of the prefix into a header. + if rest := content[p:]; rest != "" { + first := firstNonBlankLine(rest) + if isSetextUnderlineCandidate(first) { + return false + } + } + + return true +} + +// prefixHasOpenHazard reports whether prefix contains any of three +// constructs that cannot be safely cut at a blank-line boundary +// even when the immediately preceding line looks fine. Each check +// uses the SIMPLEST viable conservative rule per the F8 round-2 +// review: +// +// B1 (loose lists). A loose list has a blank line between an item +// and a continuation paragraph that begins with indentation +// but no list marker. If a candidate boundary lands on that +// blank line, the prefix's trailing non-blank line is the +// continuation paragraph, NOT a list marker, so the last-line +// check would accept it even though the list is still open. +// +// Rule chosen: any list-marker line ANYWHERE in the prefix +// forces -1. This is overly conservative — it forfeits +// boundary advancement past a closed list — but it eliminates +// the entire bug class with zero parsing of CommonMark's +// loose-list closure semantics. We retain the most useful +// boundary in practice: the one BEFORE the list opens (no +// marker has appeared in the prefix yet). +// +// B2 (HTML blocks). CommonMark defines seven HTML-block opener +// patterns (script/pre/style/textarea, comments, processing +// instructions, CDATA, declarations, recognised tag names). +// If the prefix opens an HTML block that the suffix closes, +// splitting renders the prefix as raw HTML and the suffix as +// prose. +// +// Rule chosen: any HTML-block opener anywhere in the prefix +// forces -1. Same trade-off as B1 — the typical assistant +// output contains no raw HTML, so the perf cost is zero in +// the common case. +// +// B3 (reference link definitions). A line of the form +// "[label]: " defines a link reference that the suffix +// may later use as "[text][label]". Splitting the document +// loses the definition because each half is rendered as an +// independent glamour document. +// +// Rule chosen: any reference link definition line anywhere in +// the prefix forces -1. Suffix-side reference detection is +// fragile (three syntaxes: [text][label], [label][], [label]), +// so the prefix-side check is the simpler safe choice. +// +// All three rules accept the perf hit of "no boundary after a +// list / HTML block / link def" in exchange for guaranteed +// soundness. If profiling shows this kills the F8 win on real +// streaming traces, the next iteration can promote each rule to +// its less-conservative variant (closure-aware list tracking, +// per-tag HTML close detection, suffix-aware ref tracking). +func prefixHasOpenHazard(prefix string) bool { + inFence := false + for line := range splitLines(prefix) { + // Track fenced state so list/html/ref patterns inside a + // fenced code block do not falsely trigger the hazards. + if isFenceLine(line) { + inFence = !inFence + continue + } + if inFence { + continue + } + trimmed := strings.TrimLeft(line, " \t") + if trimmed == "" { + continue + } + // B1: any list-item marker. + if isListItemMarker(trimmed) { + return true + } + // B2: HTML block opener. + if isHTMLBlockOpener(line) { + return true + } + // B3: link reference definition. + if isLinkRefDefinition(line) { + return true + } + } + return false +} + +// countFenceLines counts lines that begin a fenced code block in +// the CommonMark sense: a line whose first non-whitespace run is +// at least three consecutive backticks (or tildes). Each such +// line toggles the fenced state, so an even count means every +// opened fence has been closed. +// +// We accept up to three leading spaces of indentation (CommonMark +// rule) and require the fence characters to be the FIRST +// non-whitespace content of the line. We deliberately do NOT +// attempt to parse info-strings or differentiate opener from +// closer beyond toggling — a closing fence is just any line +// whose first non-whitespace run is >=3 of the same fence char. +func countFenceLines(s string) int { + n := 0 + for line := range splitLines(s) { + if isFenceLine(line) { + n++ + } + } + return n +} + +// isFenceLine reports whether line opens or closes a fenced code +// block. +func isFenceLine(line string) bool { + // Strip up to 3 spaces of indentation. + i := 0 + for i < len(line) && i < 3 && line[i] == ' ' { + i++ + } + if i >= len(line) { + return false + } + c := line[i] + if c != '`' && c != '~' { + return false + } + run := 0 + for i < len(line) && line[i] == c { + i++ + run++ + } + return run >= 3 +} + +// lastNonBlankLine returns the last non-blank line of s, or "" +// when every line is blank. +func lastNonBlankLine(s string) string { + last := "" + for line := range splitLines(s) { + if strings.TrimSpace(line) != "" { + last = line + } + } + return last +} + +// firstNonBlankLine returns the first non-blank line of s, or "" +// when every line is blank. +func firstNonBlankLine(s string) string { + for line := range splitLines(s) { + if strings.TrimSpace(line) != "" { + return line + } + } + return "" +} + +// splitLines yields the lines of s without their terminators. The +// final segment is yielded even if not newline-terminated. +func splitLines(s string) func(yield func(string) bool) { + return func(yield func(string) bool) { + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + if !yield(s[start:i]) { + return + } + start = i + 1 + } + } + if start <= len(s)-1 { + yield(s[start:]) + } + } +} + +// lineOpensConstruct reports whether line keeps a markdown +// construct open across the boundary. We err conservatively — +// any case that smells like list/table/quote/setext/indented-code +// returns true. +func lineOpensConstruct(line string) bool { + // Indented code: a tab, or 4+ leading spaces. + if len(line) > 0 && line[0] == '\t' { + return true + } + if strings.HasPrefix(line, " ") { + return true + } + + trimmed := strings.TrimLeft(line, " \t") + if trimmed == "" { + return false + } + + // Block quote. + if trimmed[0] == '>' { + return true + } + + // List item: "- " "* " "+ " or ". " or ") ". + if isListItemMarker(trimmed) { + return true + } + + // Table: any pipe character anywhere in the line. Conservative: + // pipe-in-prose is rare and the cost of bailing is one slow + // frame. + if strings.ContainsRune(line, '|') { + return true + } + + // Setext underline candidate as the LAST line of the prefix: + // this would be a setext header for an even-earlier paragraph. + // Refuse to split at all in this case — the boundary is right + // in the middle of a header. + if isSetextUnderlineCandidate(trimmed) { + return true + } + + return false +} + +// isListItemMarker reports whether line (already left-trimmed) +// starts with a CommonMark list-item marker followed by a space +// or tab. +func isListItemMarker(line string) bool { + if line == "" { + return false + } + c := line[0] + if c == '-' || c == '*' || c == '+' { + if len(line) >= 2 && (line[1] == ' ' || line[1] == '\t') { + return true + } + return false + } + // Ordered list: digits followed by '.' or ')' and a space. + i := 0 + for i < len(line) && line[i] >= '0' && line[i] <= '9' { + i++ + } + if i == 0 || i > 9 { + return false + } + if i >= len(line) { + return false + } + if line[i] != '.' && line[i] != ')' { + return false + } + if i+1 >= len(line) { + return false + } + return line[i+1] == ' ' || line[i+1] == '\t' +} + +// isSetextUnderlineCandidate reports whether line (with optional +// leading whitespace) consists entirely of '=' or entirely of '-' +// characters with optional trailing whitespace. CommonMark +// requires no leading whitespace on the underline; we accept up +// to three spaces for safety so an indented underline still +// blocks a split. +func isSetextUnderlineCandidate(line string) bool { + // Strip leading whitespace. + i := 0 + for i < len(line) && (line[i] == ' ' || line[i] == '\t') { + i++ + } + if i == len(line) { + return false + } + c := line[i] + if c != '=' && c != '-' { + return false + } + j := i + for j < len(line) && line[j] == c { + j++ + } + // Allow trailing whitespace. + for j < len(line) { + if line[j] != ' ' && line[j] != '\t' { + return false + } + j++ + } + // Need at least one underline character. "-" alone is also a + // list marker without a trailing space; the listItem check + // covers the marker case before we get here. + return j-i >= 1 +} + +// isHTMLBlockOpener reports whether line begins one of the seven +// CommonMark HTML block patterns. We accept up to three spaces of +// leading indentation (CommonMark rule). Matching is intentionally +// loose — we only need to know the line "looks like an HTML +// block start", not parse the contained markup. +func isHTMLBlockOpener(line string) bool { + // Strip up to 3 spaces of indentation. + i := 0 + for i < len(line) && i < 3 && line[i] == ' ' { + i++ + } + rest := line[i:] + if len(rest) < 2 || rest[0] != '<' { + return false + } + + // Type 2: HTML comment "