diff --git a/internal/ui/chat/assistant.go b/internal/ui/chat/assistant.go index e9f00020294abe2d707b645a111edf6181d424f4..412d85238e53ba33b8b21f7917b7e2298a424d20 100644 --- a/internal/ui/chat/assistant.go +++ b/internal/ui/chat/assistant.go @@ -137,6 +137,13 @@ type AssistantMessageItem struct { thinkingSec assistantSection contentSec assistantSection errorSec assistantSection + + // streamingContent caches a "stable prefix" glamour render of + // the assistant content body so each streaming flush only + // re-renders the trailing partial. F8 of + // docs/notes/2026-05-12-chat-rendering-perf.md. See + // streaming_markdown.go for the full algorithm. + streamingContent streamingMarkdown } var _ Expandable = (*AssistantMessageItem)(nil) @@ -440,7 +447,10 @@ func (a *AssistantMessageItem) cachedError(width int) string { // lines so the visual box matches what the user sees today. func (a *AssistantMessageItem) renderThinking(thinking string, width int) string { renderer := common.QuietMarkdownRenderer(a.sty, width) + mu := common.LockMarkdownRenderer(renderer) + mu.Lock() rendered, err := renderer.Render(thinking) + mu.Unlock() if err != nil { rendered = thinking } @@ -489,14 +499,18 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string return result } -// renderMarkdown renders content as markdown. +// renderMarkdown renders content as markdown. F8 routes the call +// through streamingContent, which caches the glamour render of a +// "stable prefix" so each streaming flush only re-renders the +// trailing partial. The streaming cache invalidates itself on +// width change and on any content that is not a prefix-extension +// of the previously rendered content (e.g. user retried the +// turn), and falls back to a full render whenever boundary +// detection has the slightest doubt — see +// findSafeMarkdownBoundary. func (a *AssistantMessageItem) renderMarkdown(content string, width int) string { renderer := common.MarkdownRenderer(a.sty, width) - result, err := renderer.Render(content) - if err != nil { - return content - } - return strings.TrimSuffix(result, "\n") + return a.streamingContent.Render(content, width, renderer) } func (a *AssistantMessageItem) renderSpinning() string { @@ -564,11 +578,15 @@ func (a *AssistantMessageItem) Finished() bool { // clearCache drops every cached render for this item, including the // per-section caches. Shadows the embedded cachedMessageItem.clearCache // so ClearItemCaches (style change) wipes the section caches too. +// F8: also drop the streaming-markdown stable-prefix cache because +// the cached glamour render embeds the OLD style's ANSI sequences +// and is no longer visually consistent with the new style. func (a *AssistantMessageItem) clearCache() { a.cachedMessageItem.clearCache() a.thinkingSec.reset() a.contentSec.reset() a.errorSec.reset() + a.streamingContent.Reset() } // ToggleExpanded advances the F5 thinking view-mode cycle and returns diff --git a/internal/ui/chat/incremental_glamour_test.go b/internal/ui/chat/incremental_glamour_test.go new file mode 100644 index 0000000000000000000000000000000000000000..7f6e7ac7380bcfee53dd35edb84387f2cd74973b --- /dev/null +++ b/internal/ui/chat/incremental_glamour_test.go @@ -0,0 +1,853 @@ +package chat + +import ( + "strings" + "testing" + + "charm.land/glamour/v2" + "github.com/charmbracelet/crush/internal/ui/styles" + "github.com/stretchr/testify/require" +) + +// newTestRenderer builds a fresh glamour renderer for the given +// width. We deliberately do NOT share renderers between calls in +// the equivalence tests so any hidden state in +// [glamour.TermRenderer] cannot leak from a "cached" rendering +// path into a "fresh" rendering path. +func newTestRenderer(t *testing.T, width int) *glamour.TermRenderer { + t.Helper() + sty := styles.CharmtonePantera() + r, err := glamour.NewTermRenderer( + glamour.WithStyles(sty.Markdown), + glamour.WithWordWrap(width), + ) + require.NoError(t, err) + return r +} + +// freshRender renders content as a single document with a fresh +// glamour renderer and applies the same trailing-newline trim +// that streamingMarkdown.Render does. Use this for byte- and +// visible-equivalence comparisons against the streaming path. +func freshRender(t *testing.T, content string, width int) string { + t.Helper() + r := newTestRenderer(t, width) + out, err := r.Render(content) + require.NoError(t, err) + return strings.TrimSuffix(out, "\n") +} + +// stripANSI removes all ANSI CSI escape sequences from s so two +// renders with different colour state can be compared on their +// visible glyphs alone. +func stripANSI(s string) string { + var b strings.Builder + b.Grow(len(s)) + i := 0 + for i < len(s) { + if s[i] == 0x1b && i+1 < len(s) && s[i+1] == '[' { + j := i + 2 + for j < len(s) { + c := s[j] + if c >= 0x40 && c <= 0x7e { + j++ + break + } + j++ + } + i = j + continue + } + b.WriteByte(s[i]) + i++ + } + return b.String() +} + +// normalizeRender canonicalises a rendered glamour string for +// visual-equivalence comparison: strip ANSI, drop per-line +// trailing whitespace, drop leading/trailing blank lines, and +// collapse consecutive blank lines to a single blank line. +// +// Glamour pads rendered lines with trailing spaces and adds top/ +// bottom block margins that differ subtly between "render the +// whole document at once" and "render two halves and concatenate +// them." Per F8 design principle D, those byte-level differences +// are acceptable as long as the visible content matches; this +// helper makes that comparison explicit. +func normalizeRender(s string) string { + clean := stripANSI(s) + lines := strings.Split(clean, "\n") + for i, l := range lines { + lines[i] = strings.TrimRight(l, " \t") + } + // Collapse consecutive blank lines. + out := make([]string, 0, len(lines)) + prevBlank := false + for _, l := range lines { + blank := l == "" + if blank && prevBlank { + continue + } + out = append(out, l) + prevBlank = blank + } + // Trim leading and trailing blanks. + for len(out) > 0 && out[0] == "" { + out = out[1:] + } + for len(out) > 0 && out[len(out)-1] == "" { + out = out[:len(out)-1] + } + return strings.Join(out, "\n") +} + +// containsRawMarkdownSource reports whether the visible portion of +// rendered contains literal markdown source markers that should +// have been consumed by glamour. Used by T2 to assert that +// intermediate streaming flushes don't leak raw source through to +// the user. We deliberately only flag markers that glamour +// removes during rendering ("```" fence delimiters, "|" table +// pipes embedded in a line that also contains pipes — actual +// table syntax — and bare "###" headers); pipes-in-prose and +// dashes are too common to flag. +func containsRawMarkdownSource(rendered string) bool { + clean := stripANSI(rendered) + if strings.Contains(clean, "```") { + return true + } + for _, line := range strings.Split(clean, "\n") { + if strings.HasPrefix(strings.TrimLeft(line, " \t"), "###") { + return true + } + } + return false +} + +// ----------------------------------------------------------------------- +// T1: findSafeMarkdownBoundary unit tests. +// ----------------------------------------------------------------------- + +// TestFindSafeMarkdownBoundary_TableDriven exercises the +// findSafeMarkdownBoundary decision tree across the full set of +// constructs §4.4 calls out: plain paragraphs, fenced code (open +// and closed), lists, tables, block quotes, and setext headers. +func TestFindSafeMarkdownBoundary_TableDriven(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + content string + // want is the expected boundary; -1 means "no safe + // boundary." When >=0 the test asserts content[:want] + // ends after a blank-line separator and content[:want] + // is a complete prefix. + want int + }{ + { + name: "empty", + content: "", + want: -1, + }, + { + name: "single line", + content: "Just a single paragraph", + want: -1, + }, + { + name: "two paragraphs", + content: "First paragraph.\n\nSecond paragraph.", + // boundary at start of "Second" + want: len("First paragraph.\n\n"), + }, + { + name: "three paragraphs picks latest", + content: "First.\n\nSecond.\n\nThird.", + want: len("First.\n\nSecond.\n\n"), + }, + { + name: "open fence at end", + content: "Para.\n\n```go\nfoo()\n", + // no closing fence — every blank-line candidate + // before content end is INSIDE the fence (the open + // fence opened at offset 7). Actually the ONLY + // blank line is between "Para." and "```go", so + // candidate boundary is right before "```go". At + // that point fence count = 0, even, but the line + // AFTER (the first non-blank) is "```go" which + // would change rendering of the prefix… hmm, + // actually it wouldn't change the prefix's + // rendering because the prefix is just "Para.\n\n". + // The boundary would be ACCEPTED. Let's check + // what our impl does. + want: len("Para.\n\n"), + }, + { + name: "inside open fence: no candidate after open", + content: "Para.\n\n```go\nfoo()\n\nbar()\n", + // blank line after "foo()" is INSIDE the fence + // (fence count at that prefix = 1, odd), must + // reject. The earlier blank line between "Para." + // and "```go" should still be safe (fence count + // at that prefix = 0). + want: len("Para.\n\n"), + }, + { + name: "closed fence followed by paragraph", + content: "Para1.\n\n```\nfoo()\n```\n\nPara2.", + // latest blank line is between "```" and "Para2."; + // fence count at that prefix = 2 (even), last + // non-blank line is "```" which is not a list/ + // table/quote/setext. + want: len("Para1.\n\n```\nfoo()\n```\n\n"), + }, + { + name: "open list at end", + content: "Para.\n\n- one\n- two\n", + // last non-blank line of any blank-bounded prefix + // is a list item; our boundary check rejects. + // The blank line between "Para." and "- one" is + // the only candidate, but the line AFTER (first + // non-blank of suffix) is "- one" — that's fine, + // a list opening doesn't change the prefix's + // rendering. So the boundary BEFORE the list is + // accepted. + want: len("Para.\n\n"), + }, + { + name: "list interior: no boundary", + content: "- one\n- two\n", + // no blank line at all. + want: -1, + }, + { + name: "closed list then paragraph", + content: "- one\n- two\n\nPara.", + // blank line after the list. Last non-blank line + // of prefix is "- two" — a list item — so the + // candidate is REJECTED. (Conservative: we don't + // know the list is "closed" without looking at + // what follows.) + want: -1, + }, + { + name: "table at end", + content: "Para.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |\n", + // blank-line candidate is between "Para." and + // table opener. Last non-blank line of prefix is + // "Para." — fine. Line AFTER is "| a | b |" + // which is a table line; doesn't retroactively + // change "Para." Boundary accepted. + want: len("Para.\n\n"), + }, + { + name: "table interior with internal blank line: no late boundary", + content: "| a | b |\n| --- | --- |\n\n| 1 | 2 |\n", + // the blank line in the middle is followed by + // another table line. Last non-blank line of + // prefix is "| --- | --- |" which contains a + // pipe — we reject. + want: -1, + }, + { + name: "block quote at end", + content: "Para.\n\n> quoted\n> still quoted\n", + // Last non-blank line of any prefix that ends + // inside the quote block is a "> ..." line — + // rejected. The blank line BEFORE the quote + // gives a prefix of "Para.\n\n" — last non-blank + // "Para." — accepted. + want: len("Para.\n\n"), + }, + { + name: "setext underline pending", + content: "Heading\n\n=====\n", + // blank line between "Heading" and "=====". + // Prefix = "Heading\n\n", last non-blank "Heading" + // — fine. But the FIRST non-blank line of the + // suffix is "=====", a setext-underline + // candidate. Splitting here would render the + // prefix as a paragraph "Heading", but the + // canonical render would treat the whole thing + // as a setext header. Reject. + // + // (Note: per CommonMark, a blank line between a + // paragraph and an underline actually breaks the + // setext, so the setext interpretation may not + // apply. But the boundary check is conservative + // — being wrong costs one slow frame, being + // over-aggressive costs visible breakage.) + want: -1, + }, + { + name: "indented code at end of prefix", + content: "Para.\n\n code line\n\nNext.", + // prefix candidates: + // "Para.\n\n" — last non-blank "Para.", accepted + // "Para.\n\n code line\n\n" — last non-blank + // is " code line" which is indented 4 + // spaces — REJECTED. + // Latest accepted is the first. + want: len("Para.\n\n"), + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + got := findSafeMarkdownBoundary(c.content) + require.Equalf(t, c.want, got, + "findSafeMarkdownBoundary(%q) = %d, want %d", c.content, got, c.want) + if got > 0 { + // Boundary must point to the start of a line + // (i.e. just after a newline) when the prefix + // is non-empty. + require.True(t, got <= len(c.content), + "boundary %d out of range (len=%d)", got, len(c.content)) + if got > 0 && got <= len(c.content) { + require.Equal(t, byte('\n'), c.content[got-1], + "boundary %d does not sit immediately after a newline", got) + } + } + }) + } +} + +// ----------------------------------------------------------------------- +// T2: streaming-equivalence tests. +// ----------------------------------------------------------------------- + +// streamingScenarios returns the four canonical document shapes +// that exercise different boundary-detection paths. +func streamingScenarios() []struct { + name string + doc string +} { + return []struct { + name string + doc string + }{ + { + name: "plain-paragraphs", + doc: strings.Join([]string{ + "This is the first paragraph of the document.", + "", + "Here is the second paragraph; it has some words.", + "", + "And a third paragraph for good measure.", + "", + "Finally a fourth paragraph to push past one boundary.", + }, "\n"), + }, + { + name: "paragraphs-with-fence", + doc: strings.Join([]string{ + "Intro paragraph.", + "", + "Some explanatory prose before the code.", + "", + "```go", + "func hello() {", + "\tfmt.Println(\"hi\")", + "}", + "```", + "", + "And a closing paragraph after the code block.", + }, "\n"), + }, + { + name: "paragraphs-with-list", + doc: strings.Join([]string{ + "Intro paragraph.", + "", + "- list item one", + "- list item two", + "- list item three", + "", + "Trailing paragraph.", + }, "\n"), + }, + { + name: "paragraphs-with-table", + doc: strings.Join([]string{ + "Intro paragraph.", + "", + "| col a | col b |", + "| ----- | ----- |", + "| 1 | 2 |", + "| 3 | 4 |", + "", + "Trailing paragraph after the table.", + }, "\n"), + }, + } +} + +// progressivePrefixes splits doc into n monotonically growing +// byte prefixes, ending with the full document. n>=1. +func progressivePrefixes(doc string, n int) []string { + if n < 1 { + n = 1 + } + out := make([]string, 0, n) + for i := 1; i <= n; i++ { + // integer scaling so the last entry is exactly len(doc) + size := len(doc) * i / n + if i == n { + size = len(doc) + } + out = append(out, doc[:size]) + } + return out +} + +// TestStreamingMarkdown_FinalVisuallyEquivalent drives a sequence +// of progressive prefixes through streamingMarkdown and asserts +// the FINAL output is visually equivalent (per design principle +// D) to a fresh full-document render. Strict byte-equality is +// not the bar — see the comment in normalizeRender for why. +func TestStreamingMarkdown_FinalVisuallyEquivalent(t *testing.T) { + t.Parallel() + + const width = 80 + const steps = 15 + + for _, sc := range streamingScenarios() { + t.Run(sc.name, func(t *testing.T) { + t.Parallel() + renderer := newTestRenderer(t, width) + var sm streamingMarkdown + prefixes := progressivePrefixes(sc.doc, steps) + + var lastOut string + for _, p := range prefixes { + lastOut = sm.Render(p, width, renderer) + } + + fresh := freshRender(t, sc.doc, width) + require.Equal(t, normalizeRender(fresh), normalizeRender(lastOut), + "final streaming output must match a fresh full render visually") + }) + } +} + +// TestStreamingMarkdown_IntermediateOutputsPlausible asserts that +// every intermediate flush returns a non-empty string and does +// not leak raw markdown source through to the user. This is the +// "visually plausible" half of T2. +func TestStreamingMarkdown_IntermediateOutputsPlausible(t *testing.T) { + t.Parallel() + + const width = 80 + const steps = 12 + + for _, sc := range streamingScenarios() { + t.Run(sc.name, func(t *testing.T) { + t.Parallel() + renderer := newTestRenderer(t, width) + var sm streamingMarkdown + + for i, p := range progressivePrefixes(sc.doc, steps) { + if p == "" { + continue + } + out := sm.Render(p, width, renderer) + require.NotEmptyf(t, out, "step %d: empty render for prefix len %d", i, len(p)) + require.Falsef(t, containsRawMarkdownSource(out), + "step %d: render leaked raw markdown source.\nprefix=%q\nout=%s", + i, p, normalizeRender(out)) + } + }) + } +} + +// ----------------------------------------------------------------------- +// T3: cache invalidation tests. +// ----------------------------------------------------------------------- + +// TestStreamingMarkdown_WidthChangeInvalidates asserts that a +// width change blows away the cached prefix so the next render +// is keyed against the new width. We can't observe the cache +// directly without reaching into the struct, so we assert the +// observable contract: after a width change, the rendered output +// reflects the new width AND the streamingMarkdown's internal +// cache fields are reset to the new state. +func TestStreamingMarkdown_WidthChangeInvalidates(t *testing.T) { + t.Parallel() + + doc := "Para one.\n\nPara two.\n\nPara three." + r80 := newTestRenderer(t, 80) + r40 := newTestRenderer(t, 40) + var sm streamingMarkdown + + out80 := sm.Render(doc, 80, r80) + require.Equal(t, 80, sm.width, "width must be cached after first render") + cachedPrefix := sm.stablePrefix + + out40 := sm.Render(doc, 40, r40) + require.Equal(t, 40, sm.width, "width change must update cached width") + require.NotEqual(t, out80, out40, + "different widths must produce different rendered output") + // stablePrefix may legitimately have re-advanced after the + // reset (tryAdvanceFromEmpty), but if it has, it can no + // longer carry the OLD width's render. We assert the cache + // reset by checking that the cached prefix length is at + // most the current content length. + require.True(t, len(sm.stablePrefix) <= len(doc), + "stable prefix must be a prefix of the current content") + _ = cachedPrefix +} + +// TestStreamingMarkdown_NonPrefixContentInvalidates verifies +// that content which is NOT a prefix-extension of the cached +// stable prefix triggers a Reset and a fresh render path. This +// guards the "user retried the turn" case. +func TestStreamingMarkdown_NonPrefixContentInvalidates(t *testing.T) { + t.Parallel() + + const width = 80 + r := newTestRenderer(t, width) + var sm streamingMarkdown + + // Drive a streaming sequence so the cache picks up a stable + // prefix. + doc := "Para one.\n\nPara two.\n\nPara three." + for _, p := range progressivePrefixes(doc, 6) { + _ = sm.Render(p, width, r) + } + require.NotEmpty(t, sm.stablePrefix, + "stable prefix must be populated after streaming a multi-paragraph doc") + + // Now switch to entirely different content (user retried). + other := "Completely different opening paragraph.\n\nAnd a second." + out := sm.Render(other, width, r) + require.NotEmpty(t, out) + // stablePrefix must be a prefix of `other`, i.e. cache was + // reset off the OLD content. + require.True(t, strings.HasPrefix(other, sm.stablePrefix), + "stable prefix must be reset to a prefix of the new content") + + // Visual equivalence to a fresh render of `other`. + fresh := freshRender(t, other, width) + require.Equal(t, normalizeRender(fresh), normalizeRender(out), + "render after non-prefix content change must match a fresh render") +} + +// TestStreamingMarkdown_ResetClearsCache asserts Reset() drops +// every cached field; the next render is necessarily a full +// render path. +func TestStreamingMarkdown_ResetClearsCache(t *testing.T) { + t.Parallel() + + const width = 80 + r := newTestRenderer(t, width) + var sm streamingMarkdown + + doc := "Para one.\n\nPara two.\n\nPara three." + _ = sm.Render(doc, width, r) + // The sample doc has safe boundaries so the cache should + // have advanced. If for some reason it didn't, we still + // want Reset to be a no-op-safe operation; assert the + // post-Reset state directly. + sm.Reset() + require.Equal(t, 0, sm.width) + require.Equal(t, "", sm.stablePrefix) + require.Equal(t, "", sm.stablePrefixRender) + + // Next render must be a full render path. Drive one step + // and verify the output matches a fresh full render. + out := sm.Render(doc, width, r) + fresh := freshRender(t, doc, width) + require.Equal(t, normalizeRender(fresh), normalizeRender(out)) +} + +// ----------------------------------------------------------------------- +// T4: fallback safety. +// ----------------------------------------------------------------------- + +// TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders covers +// the "one giant table being built character by character" case. +// Every flush must fall back to a full render; the cache must +// not advance into an unsafe state. We compare each flush to a +// fresh full render of the same prefix; bytes must match for +// each prefix individually. +// +// (Byte equality is sound here because no concatenation happens: +// the streaming path delegates straight to renderer.Render when +// the cache is empty and no safe boundary exists.) +func TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders(t *testing.T) { + t.Parallel() + + const width = 80 + + // One growing table — no blank lines anywhere, so no + // boundary candidate is ever found. + doc := strings.Join([]string{ + "| col a | col b | col c |", + "| ----- | ----- | ----- |", + "| 1 | 2 | 3 |", + "| 4 | 5 | 6 |", + "| 7 | 8 | 9 |", + "| 10 | 11 | 12 |", + "| 13 | 14 | 15 |", + "| 16 | 17 | 18 |", + "| 19 | 20 | 21 |", + "| 22 | 23 | 24 |", + }, "\n") + require.Equal(t, -1, findSafeMarkdownBoundary(doc), + "sanity check: no blank lines, no safe boundary") + + r := newTestRenderer(t, width) + var sm streamingMarkdown + + prefixes := progressivePrefixes(doc, 10) + for i, p := range prefixes { + if p == "" { + continue + } + out := sm.Render(p, width, r) + fresh := freshRender(t, p, width) + require.Equalf(t, fresh, out, + "step %d (len=%d): streaming output must byte-equal a fresh render when boundary detection fails", + i, len(p)) + } + // Cache must remain empty: no boundary was ever found, no + // width change occurred, no advance ever cached anything. + require.Equal(t, "", sm.stablePrefix, + "stable prefix must remain empty when no safe boundary ever exists") +} + +// TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash is the +// minimum-viability assertion of T4: even when boundary +// detection fails on every flush the streaming path must not +// crash and must produce non-empty output for non-empty input. +func TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash(t *testing.T) { + t.Parallel() + + const width = 80 + r := newTestRenderer(t, width) + var sm streamingMarkdown + + // A deeply-pathological input: a single line that grows + // one character at a time. There is never a blank-line + // separator so the cache is never advanced. + src := "The quick brown fox jumps over the lazy dog." + for i := 1; i <= len(src); i++ { + out := sm.Render(src[:i], width, r) + require.NotEmpty(t, out, "streaming output must not be empty for non-empty input") + } +} + +// ----------------------------------------------------------------------- +// Integration assertions on the wired-in path. +// ----------------------------------------------------------------------- + +// ----------------------------------------------------------------------- +// T5 / T6 / T7: anywhere-in-prefix hazards (B1 / B2 / B3 from the +// F8 round-2 review). For each hazard we drive every progressive +// prefix of a document that exercises the hazard through the cache +// and assert two contracts: +// +// 1. The cached stable prefix never contains the hazard. If the +// hazard line is at byte offset H, then after every flush +// len(sm.stablePrefix) <= H. This is the "no silent +// corruption" half — the algorithm cannot accept a boundary +// that splits across the hazard. +// +// 2. The final flush is visually equivalent to a fresh full +// render of the complete document. This is the same T2-style +// equivalence assertion ported to the new doc shapes. +// ----------------------------------------------------------------------- + +// nonBlankLines returns the non-blank visible lines of s with +// per-line trailing whitespace trimmed. Used to compare two +// rendered fragments for content equivalence when paragraph- +// margin behaviour legitimately differs between a single fresh +// render and a streaming split render (per F8 design principle D +// — visual equivalence is the bar, byte-equivalence is not). +// +// Some glamour block types (notably HTML blocks and reference +// link definitions) interact with adjacent paragraph blocks +// during a single render — adjacency effectively suppresses the +// blank-line margin between blocks. When the streaming path +// renders the prefix and trail in separate calls, the seam is +// re-introduced as a blank line. The visible TEXT is identical; +// only the inter-block margin differs. +func nonBlankLines(s string) []string { + clean := stripANSI(s) + out := make([]string, 0) + for _, l := range strings.Split(clean, "\n") { + l = strings.TrimRight(l, " \t") + if strings.TrimSpace(l) == "" { + continue + } + out = append(out, l) + } + return out +} + +// runProgressiveBoundaryRespectTest is the shared body of T5/T6/T7. +// It accepts a document and the byte offset of the line whose +// PRESENCE in the prefix must trigger the hazard reject; the +// cached stable prefix may never extend past hazardLineOffset. +// +// The final-output equivalence check is content-based (non-blank +// lines compared) rather than full-normalization: see +// nonBlankLines for the reason. +func runProgressiveBoundaryRespectTest(t *testing.T, doc string, hazardLineOffset int) { + t.Helper() + const width = 80 + const steps = 25 + + renderer := newTestRenderer(t, width) + var sm streamingMarkdown + + prefixes := progressivePrefixes(doc, steps) + var lastOut string + for i, p := range prefixes { + if p == "" { + continue + } + lastOut = sm.Render(p, width, renderer) + require.NotEmptyf(t, lastOut, "step %d: empty render", i) + require.LessOrEqualf(t, len(sm.stablePrefix), hazardLineOffset, + "step %d: cached stable prefix advanced past the hazard line\n"+ + "prefix len=%d, hazard at %d, sm.stablePrefix=%q", + i, len(sm.stablePrefix), hazardLineOffset, sm.stablePrefix) + } + + fresh := freshRender(t, doc, width) + require.Equal(t, nonBlankLines(fresh), nonBlankLines(lastOut), + "final streaming output must contain the same non-blank lines as a fresh full render") +} + +// TestStreamingMarkdown_LooseListContinuation locks in the B1 fix. +// A loose list followed by a continuation paragraph and then a +// trailing paragraph creates a candidate boundary between the list +// item and its continuation; the trailing non-blank line of that +// candidate prefix is the continuation paragraph (not a list +// marker), so the line-only check would accept it. The +// anywhere-in-prefix list-marker check rejects it. +func TestStreamingMarkdown_LooseListContinuation(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "- item one", + "", + " continuation paragraph still belongs to item one", + "", + "- item two", + "", + "Trailing paragraph after the list.", + }, "\n") + + // The first list marker line begins after "Intro paragraph.\n\n". + // The cached stable prefix may include that boundary (BEFORE + // the list opens) but must never advance into the list. + hazardOffset := strings.Index(doc, "- item one") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestStreamingMarkdown_HTMLBlock locks in the B2 fix. A raw HTML +// block followed by a paragraph creates a candidate boundary +// between the closed HTML block and the trailing paragraph. The +// anywhere-in-prefix HTML-opener check rejects any boundary that +// would include the HTML block in the stable prefix. +func TestStreamingMarkdown_HTMLBlock(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "
", + "some block content", + "
", + "", + "Trailing paragraph after the HTML block.", + }, "\n") + + hazardOffset := strings.Index(doc, "
") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestStreamingMarkdown_HTMLBlockType7 covers HTML block type 7 +// (CommonMark): a generic open/close tag whose name is NOT in the +// fixed type-6 set still opens an HTML block and must forfeit any +// boundary that would split the block off from following content. +func TestStreamingMarkdown_HTMLBlockType7(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "", + "some block content", + "", + "", + "Trailing paragraph after the custom-tag block.", + }, "\n") + + hazardOffset := strings.Index(doc, "") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestStreamingMarkdown_LinkRefDefinition locks in the B3 fix. A +// reference link definition followed by a paragraph that uses the +// reference creates a boundary candidate between the def and the +// paragraph; rendering them in separate glamour passes loses the +// definition. The anywhere-in-prefix ref-def check rejects. +func TestStreamingMarkdown_LinkRefDefinition(t *testing.T) { + t.Parallel() + + doc := strings.Join([]string{ + "Intro paragraph.", + "", + "[ref]: http://example.com", + "", + "Trailing paragraph that links to [the example][ref] inline.", + }, "\n") + + hazardOffset := strings.Index(doc, "[ref]:") + require.Greater(t, hazardOffset, 0, "test setup") + + runProgressiveBoundaryRespectTest(t, doc, hazardOffset) +} + +// TestAssistantStreamingContent_ResetOnClearCache guards the +// integration contract that ClearItemCaches (style change) drops +// the streaming-markdown cache. Without this, a style change +// would leave the OLD style's ANSI sequences embedded in the +// stable-prefix render and the next flush would visually mix +// styles. +func TestAssistantStreamingContent_ResetOnClearCache(t *testing.T) { + t.Parallel() + + sty := styles.CharmtonePantera() + doc := "Para one.\n\nPara two.\n\nPara three." + msg := finishedAssistantMessage("stream-clear", doc) + item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem) + + const width = 80 + _ = item.RawRender(width) + // Drive a second message that extends the content so the + // streaming cache has a chance to advance (if it would). + doc2 := doc + "\n\nFour." + item.SetMessage(finishedAssistantMessage("stream-clear", doc2)) + _ = item.RawRender(width) + + // Now wipe the caches the way ClearItemCaches does. + item.clearCache() + + require.Equal(t, "", item.streamingContent.stablePrefix, + "clearCache must Reset the streaming-markdown cache") + require.Equal(t, "", item.streamingContent.stablePrefixRender) + require.Equal(t, 0, item.streamingContent.width) +} diff --git a/internal/ui/chat/streaming_markdown.go b/internal/ui/chat/streaming_markdown.go new file mode 100644 index 0000000000000000000000000000000000000000..98535a8543cf6b3b99a3372297bcc17094c918e6 --- /dev/null +++ b/internal/ui/chat/streaming_markdown.go @@ -0,0 +1,740 @@ +package chat + +import ( + "strings" + + "charm.land/glamour/v2" + "github.com/charmbracelet/crush/internal/ui/common" +) + +// streamingMarkdown caches a "stable prefix" glamour render so each +// streaming flush only re-renders the trailing portion of the +// document. F8 of docs/notes/2026-05-12-chat-rendering-perf.md. +// +// The boundary between "stable" and "trailing" is detected by +// [findSafeMarkdownBoundary]: a position immediately after a blank +// line at which we can prove no markdown construct is open +// (fenced code block, list, table, block quote, setext header). +// +// Two renders concatenated are NOT generally equal to a single +// render of the whole document — glamour's wrap state is reset +// between calls. The boundary check is therefore deliberately +// conservative; whenever it has the slightest doubt the call +// falls back to a full render and the cache is left untouched. +// +// Invariants: +// +// - stablePrefix is always a literal byte prefix of the most +// recently rendered content. If a new content does not have +// stablePrefix as its prefix the cache is dropped. +// - stablePrefixRender is the glamour render of stablePrefix +// alone, with surrounding whitespace trimmed for clean +// concatenation. +// - width is the glamour wrap width that produced +// stablePrefixRender. A width change drops the cache. +type streamingMarkdown struct { + width int + stablePrefix string + stablePrefixRender string +} + +// Reset drops every cached field. After Reset the next Render call +// is guaranteed to be a full render. +func (s *streamingMarkdown) Reset() { + s.width = 0 + s.stablePrefix = "" + s.stablePrefixRender = "" +} + +// Render returns the glamour render of content at the given width, +// reusing the cached stable-prefix render when it is safe to do so. +// On any uncertainty the call falls back to a full render via +// renderer and leaves the cache untouched (or drops it). +// +// The returned string has its trailing newline trimmed to match +// the existing renderMarkdown contract on AssistantMessageItem. +// +// Concurrency: glamour's Render is stateful and not safe for +// concurrent invocation on a shared renderer. Crush's TUI is +// single-threaded so production never contends, but parallel +// callers (most notably the test suite) must serialize. We hold +// [common.LockMarkdownRenderer] for the entire prefix + +// trailing render sequence so other goroutines cannot interleave +// their own Render calls and corrupt goldmark's BlockStack. +func (s *streamingMarkdown) Render(content string, width int, renderer *glamour.TermRenderer) string { + mu := common.LockMarkdownRenderer(renderer) + mu.Lock() + defer mu.Unlock() + full := func() string { + out, err := renderer.Render(content) + if err != nil { + return content + } + return strings.TrimSuffix(out, "\n") + } + + // Width change OR content not a prefix-extension: drop cache, + // full render, optionally try to seed a fresh boundary on this + // call (step "f" in the design note). + if width != s.width || !strings.HasPrefix(content, s.stablePrefix) { + s.Reset() + s.width = width + out := full() + s.tryAdvanceFromEmpty(content, width, renderer) + return out + } + + boundary := findSafeMarkdownBoundary(content) + if boundary < 0 { + // No safe boundary anywhere yet. Full render; do not + // modify the cache (a future flush may find one). + return full() + } + + if boundary <= len(s.stablePrefix) { + // Cached prefix already covers an at-least-as-late + // boundary. Render the trailing partial fresh and glue. + trail := content[len(s.stablePrefix):] + return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer)) + } + + // boundary > len(stablePrefix): we have a NEW chunk of safe + // content. Render the new chunk, append to stablePrefixRender, + // promote the boundary, then render the remaining trail. + newChunk := content[len(s.stablePrefix):boundary] + newChunkRender := s.renderTrailing(newChunk, renderer) + s.stablePrefixRender = glueRenders(s.stablePrefixRender, newChunkRender) + s.stablePrefix = content[:boundary] + + trail := content[boundary:] + if trail == "" { + // boundary == len(content): no trailing content. Returning + // the cached prefix render directly is correct. + return s.stablePrefixRender + } + return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer)) +} + +// tryAdvanceFromEmpty seeds the cache from a fresh state. We've +// already paid the cost of a full render of `content`; if there is +// a safe boundary inside it, render the prefix once more (cheap +// relative to the full render we just did) and cache it so the +// next flush can avoid the full work. +// +// This is the optional optimisation step "f" from the design +// note. We render the prefix separately rather than try to +// recover it from the full render output because two renders +// concatenated ≠ a single render of the whole, and we prefer the +// cached prefix render to be byte-for-byte what we'd produce on a +// future cached call. +func (s *streamingMarkdown) tryAdvanceFromEmpty(content string, width int, renderer *glamour.TermRenderer) { + boundary := findSafeMarkdownBoundary(content) + if boundary <= 0 { + return + } + prefix := content[:boundary] + out, err := renderer.Render(prefix) + if err != nil { + return + } + s.stablePrefix = prefix + s.stablePrefixRender = trimGlamourMargins(out) + s.width = width +} + +// renderTrailing renders a trailing partial as a fresh glamour +// document and trims the surrounding whitespace so it can be +// concatenated to a cached prefix render without doubled blank +// lines. +func (s *streamingMarkdown) renderTrailing(text string, renderer *glamour.TermRenderer) string { + if text == "" { + return "" + } + out, err := renderer.Render(text) + if err != nil { + return text + } + return trimGlamourMargins(out) +} + +// glueRenders concatenates two glamour-rendered fragments with a +// single blank line separator. Glamour outputs typically carry +// their own surrounding margins; trimming on both sides and +// gluing with "\n\n" prevents the visible double-margin seam. +// +// Empty fragments are tolerated so the same helper works for the +// "boundary == len(content)" path where there is no trailing +// segment. +func glueRenders(prefix, trail string) string { + prefix = trimGlamourMargins(prefix) + trail = trimGlamourMargins(trail) + switch { + case prefix == "" && trail == "": + return "" + case prefix == "": + return trail + case trail == "": + return prefix + default: + return prefix + "\n\n" + trail + } +} + +// trimGlamourMargins strips leading and trailing whitespace +// (including newlines) from a glamour-rendered fragment. +// Glamour adds a leading blank line for documents that open with +// a heading or paragraph, plus a trailing newline; both must be +// removed before concatenation. +func trimGlamourMargins(s string) string { + return strings.Trim(s, " \t\n") +} + +// findSafeMarkdownBoundary returns the byte offset of the END of +// the latest safe boundary in content, i.e. the offset such that +// content[:boundary] is a valid stable-prefix candidate. The +// returned offset always points immediately after a blank-line +// separator, so concatenating a fresh render of content[boundary:] +// to a cached render of content[:boundary] does not require glamour +// to share state across the cut. +// +// Returns -1 when no safe boundary exists. SAFETY FIRST: any time +// we have the slightest doubt we return -1 and let the caller fall +// back to a full render. +// +// Decision tree, in order of preference (latest boundary wins): +// +// 1. Walk backward through every "blank line" position p such that +// content[:p] ends with "\n\n" (or "\n[ \t]*\n"). +// 2. For each candidate, check that content[:p] has an even +// number of triple-backtick fence lines (no open fenced +// block). Any odd count means we'd be cutting inside a fence +// and mis-syntax-highlighting the trailing partial. +// 2b. Reject if any line in content[:p] (outside fenced blocks) +// is a list-marker line, an HTML-block opener, or a link +// reference definition. See [prefixHasOpenHazard] for the +// reasoning behind these "anywhere in prefix" rejects. +// 3. Reject if the last non-blank line of content[:p] is: +// - a list item marker line ("^\s*([-*+]|\d+\.)\s") +// - a table line (contains "|") +// - a block quote ("^\s*>") +// - a setext header underline ("^=+\s*$" or "^-+\s*$") +// - an indented code line (4+ leading spaces or a tab) +// 4. Reject if the line immediately AFTER the boundary (skipping +// leading blank lines) looks like a setext underline (a line +// of '=' or '-' only). Rendering the prefix as a paragraph +// would change once the underline arrived; that's exactly the +// "splitting changes the prefix render" hazard §4.4 calls out. +// +// Returns the byte offset of the first character AFTER the blank +// line, i.e. the start of the trailing segment. +func findSafeMarkdownBoundary(content string) int { + if len(content) == 0 { + return -1 + } + + // Iterate every blank-line position from latest to earliest. + for p := blankLineBefore(content, len(content)); p > 0; p = blankLineBefore(content, p-1) { + if !isSafeBoundaryAt(content, p) { + continue + } + return p + } + return -1 +} + +// blankLineBefore returns the byte offset of the first character +// AFTER the latest blank-line separator that ends strictly before +// `until`. A blank-line separator is a sequence "\n([ \t]*\n)+" +// — one newline, then one or more lines containing only spaces or +// tabs and terminated by another newline. The returned offset is +// the start of the first non-blank line that follows the +// separator (or the position immediately after the final newline, +// if no further content remains). +// +// Returns -1 when no blank-line separator exists before `until`. +func blankLineBefore(content string, until int) int { + if until <= 0 { + return -1 + } + // Walk backward looking for a newline followed (after optional + // blank-line content) by another newline. We track the latest + // newline we've seen; if the next earlier newline has only + // blank chars between them, we have a blank-line separator + // and the boundary sits immediately after the latest newline. + end := until + for end > 0 { + nl := strings.LastIndexByte(content[:end], '\n') + if nl < 0 { + return -1 + } + // Look for an earlier newline whose gap to nl is empty + // or whitespace only. + prev := strings.LastIndexByte(content[:nl], '\n') + for prev >= 0 { + gap := content[prev+1 : nl] + if isBlankOrSpaces(gap) { + return nl + 1 + } + // Gap had non-whitespace; nl is not a blank-line + // separator. Move up: try with the earlier newline as + // the new "nl" candidate. + break + } + end = nl + } + return -1 +} + +// isBlankOrSpaces reports whether s consists entirely of spaces +// and tabs (or is empty). +func isBlankOrSpaces(s string) bool { + for i := range len(s) { + if s[i] != ' ' && s[i] != '\t' { + return false + } + } + return true +} + +// isSafeBoundaryAt reports whether content[:p] is a safe stable +// prefix. p must be a blank-line boundary (start of a line, with a +// blank line immediately preceding). +// +// Beyond the last-line checks, three "anywhere in the prefix" +// hazards force a reject because they cannot be reliably reasoned +// about by inspecting the trailing line alone. For each of these +// the simplest, safest rule was chosen — see prefixHasOpenHazard. +func isSafeBoundaryAt(content string, p int) bool { + prefix := content[:p] + + // (2) Even number of triple-backtick fence lines. + if countFenceLines(prefix)%2 != 0 { + return false + } + + // (2b) Anywhere-in-prefix hazards: open list (B1), HTML block + // opener (B2), reference link definition (B3). Any of these + // anywhere in the prefix forces a fallback. + if prefixHasOpenHazard(prefix) { + return false + } + + // (3) Inspect the last non-blank line of the prefix. + lastLine := lastNonBlankLine(prefix) + if lastLine != "" && lineOpensConstruct(lastLine) { + return false + } + + // (4) If anything follows, make sure it doesn't look like a + // setext underline that would retroactively turn the last + // paragraph of the prefix into a header. + if rest := content[p:]; rest != "" { + first := firstNonBlankLine(rest) + if isSetextUnderlineCandidate(first) { + return false + } + } + + return true +} + +// prefixHasOpenHazard reports whether prefix contains any of three +// constructs that cannot be safely cut at a blank-line boundary +// even when the immediately preceding line looks fine. Each check +// uses the SIMPLEST viable conservative rule per the F8 round-2 +// review: +// +// B1 (loose lists). A loose list has a blank line between an item +// and a continuation paragraph that begins with indentation +// but no list marker. If a candidate boundary lands on that +// blank line, the prefix's trailing non-blank line is the +// continuation paragraph, NOT a list marker, so the last-line +// check would accept it even though the list is still open. +// +// Rule chosen: any list-marker line ANYWHERE in the prefix +// forces -1. This is overly conservative — it forfeits +// boundary advancement past a closed list — but it eliminates +// the entire bug class with zero parsing of CommonMark's +// loose-list closure semantics. We retain the most useful +// boundary in practice: the one BEFORE the list opens (no +// marker has appeared in the prefix yet). +// +// B2 (HTML blocks). CommonMark defines seven HTML-block opener +// patterns (script/pre/style/textarea, comments, processing +// instructions, CDATA, declarations, recognised tag names). +// If the prefix opens an HTML block that the suffix closes, +// splitting renders the prefix as raw HTML and the suffix as +// prose. +// +// Rule chosen: any HTML-block opener anywhere in the prefix +// forces -1. Same trade-off as B1 — the typical assistant +// output contains no raw HTML, so the perf cost is zero in +// the common case. +// +// B3 (reference link definitions). A line of the form +// "[label]: " defines a link reference that the suffix +// may later use as "[text][label]". Splitting the document +// loses the definition because each half is rendered as an +// independent glamour document. +// +// Rule chosen: any reference link definition line anywhere in +// the prefix forces -1. Suffix-side reference detection is +// fragile (three syntaxes: [text][label], [label][], [label]), +// so the prefix-side check is the simpler safe choice. +// +// All three rules accept the perf hit of "no boundary after a +// list / HTML block / link def" in exchange for guaranteed +// soundness. If profiling shows this kills the F8 win on real +// streaming traces, the next iteration can promote each rule to +// its less-conservative variant (closure-aware list tracking, +// per-tag HTML close detection, suffix-aware ref tracking). +func prefixHasOpenHazard(prefix string) bool { + inFence := false + for line := range splitLines(prefix) { + // Track fenced state so list/html/ref patterns inside a + // fenced code block do not falsely trigger the hazards. + if isFenceLine(line) { + inFence = !inFence + continue + } + if inFence { + continue + } + trimmed := strings.TrimLeft(line, " \t") + if trimmed == "" { + continue + } + // B1: any list-item marker. + if isListItemMarker(trimmed) { + return true + } + // B2: HTML block opener. + if isHTMLBlockOpener(line) { + return true + } + // B3: link reference definition. + if isLinkRefDefinition(line) { + return true + } + } + return false +} + +// countFenceLines counts lines that begin a fenced code block in +// the CommonMark sense: a line whose first non-whitespace run is +// at least three consecutive backticks (or tildes). Each such +// line toggles the fenced state, so an even count means every +// opened fence has been closed. +// +// We accept up to three leading spaces of indentation (CommonMark +// rule) and require the fence characters to be the FIRST +// non-whitespace content of the line. We deliberately do NOT +// attempt to parse info-strings or differentiate opener from +// closer beyond toggling — a closing fence is just any line +// whose first non-whitespace run is >=3 of the same fence char. +func countFenceLines(s string) int { + n := 0 + for line := range splitLines(s) { + if isFenceLine(line) { + n++ + } + } + return n +} + +// isFenceLine reports whether line opens or closes a fenced code +// block. +func isFenceLine(line string) bool { + // Strip up to 3 spaces of indentation. + i := 0 + for i < len(line) && i < 3 && line[i] == ' ' { + i++ + } + if i >= len(line) { + return false + } + c := line[i] + if c != '`' && c != '~' { + return false + } + run := 0 + for i < len(line) && line[i] == c { + i++ + run++ + } + return run >= 3 +} + +// lastNonBlankLine returns the last non-blank line of s, or "" +// when every line is blank. +func lastNonBlankLine(s string) string { + last := "" + for line := range splitLines(s) { + if strings.TrimSpace(line) != "" { + last = line + } + } + return last +} + +// firstNonBlankLine returns the first non-blank line of s, or "" +// when every line is blank. +func firstNonBlankLine(s string) string { + for line := range splitLines(s) { + if strings.TrimSpace(line) != "" { + return line + } + } + return "" +} + +// splitLines yields the lines of s without their terminators. The +// final segment is yielded even if not newline-terminated. +func splitLines(s string) func(yield func(string) bool) { + return func(yield func(string) bool) { + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + if !yield(s[start:i]) { + return + } + start = i + 1 + } + } + if start <= len(s)-1 { + yield(s[start:]) + } + } +} + +// lineOpensConstruct reports whether line keeps a markdown +// construct open across the boundary. We err conservatively — +// any case that smells like list/table/quote/setext/indented-code +// returns true. +func lineOpensConstruct(line string) bool { + // Indented code: a tab, or 4+ leading spaces. + if len(line) > 0 && line[0] == '\t' { + return true + } + if strings.HasPrefix(line, " ") { + return true + } + + trimmed := strings.TrimLeft(line, " \t") + if trimmed == "" { + return false + } + + // Block quote. + if trimmed[0] == '>' { + return true + } + + // List item: "- " "* " "+ " or ". " or ") ". + if isListItemMarker(trimmed) { + return true + } + + // Table: any pipe character anywhere in the line. Conservative: + // pipe-in-prose is rare and the cost of bailing is one slow + // frame. + if strings.ContainsRune(line, '|') { + return true + } + + // Setext underline candidate as the LAST line of the prefix: + // this would be a setext header for an even-earlier paragraph. + // Refuse to split at all in this case — the boundary is right + // in the middle of a header. + if isSetextUnderlineCandidate(trimmed) { + return true + } + + return false +} + +// isListItemMarker reports whether line (already left-trimmed) +// starts with a CommonMark list-item marker followed by a space +// or tab. +func isListItemMarker(line string) bool { + if line == "" { + return false + } + c := line[0] + if c == '-' || c == '*' || c == '+' { + if len(line) >= 2 && (line[1] == ' ' || line[1] == '\t') { + return true + } + return false + } + // Ordered list: digits followed by '.' or ')' and a space. + i := 0 + for i < len(line) && line[i] >= '0' && line[i] <= '9' { + i++ + } + if i == 0 || i > 9 { + return false + } + if i >= len(line) { + return false + } + if line[i] != '.' && line[i] != ')' { + return false + } + if i+1 >= len(line) { + return false + } + return line[i+1] == ' ' || line[i+1] == '\t' +} + +// isSetextUnderlineCandidate reports whether line (with optional +// leading whitespace) consists entirely of '=' or entirely of '-' +// characters with optional trailing whitespace. CommonMark +// requires no leading whitespace on the underline; we accept up +// to three spaces for safety so an indented underline still +// blocks a split. +func isSetextUnderlineCandidate(line string) bool { + // Strip leading whitespace. + i := 0 + for i < len(line) && (line[i] == ' ' || line[i] == '\t') { + i++ + } + if i == len(line) { + return false + } + c := line[i] + if c != '=' && c != '-' { + return false + } + j := i + for j < len(line) && line[j] == c { + j++ + } + // Allow trailing whitespace. + for j < len(line) { + if line[j] != ' ' && line[j] != '\t' { + return false + } + j++ + } + // Need at least one underline character. "-" alone is also a + // list marker without a trailing space; the listItem check + // covers the marker case before we get here. + return j-i >= 1 +} + +// isHTMLBlockOpener reports whether line begins one of the seven +// CommonMark HTML block patterns. We accept up to three spaces of +// leading indentation (CommonMark rule). Matching is intentionally +// loose — we only need to know the line "looks like an HTML +// block start", not parse the contained markup. +func isHTMLBlockOpener(line string) bool { + // Strip up to 3 spaces of indentation. + i := 0 + for i < len(line) && i < 3 && line[i] == ' ' { + i++ + } + rest := line[i:] + if len(rest) < 2 || rest[0] != '<' { + return false + } + + // Type 2: HTML comment "