@@ -0,0 +1,853 @@
+package chat
+
+import (
+ "strings"
+ "testing"
+
+ "charm.land/glamour/v2"
+ "github.com/charmbracelet/crush/internal/ui/styles"
+ "github.com/stretchr/testify/require"
+)
+
+// newTestRenderer builds a fresh glamour renderer for the given
+// width. We deliberately do NOT share renderers between calls in
+// the equivalence tests so any hidden state in
+// [glamour.TermRenderer] cannot leak from a "cached" rendering
+// path into a "fresh" rendering path.
+func newTestRenderer(t *testing.T, width int) *glamour.TermRenderer {
+ t.Helper()
+ sty := styles.CharmtonePantera()
+ r, err := glamour.NewTermRenderer(
+ glamour.WithStyles(sty.Markdown),
+ glamour.WithWordWrap(width),
+ )
+ require.NoError(t, err)
+ return r
+}
+
+// freshRender renders content as a single document with a fresh
+// glamour renderer and applies the same trailing-newline trim
+// that streamingMarkdown.Render does. Use this for byte- and
+// visible-equivalence comparisons against the streaming path.
+func freshRender(t *testing.T, content string, width int) string {
+ t.Helper()
+ r := newTestRenderer(t, width)
+ out, err := r.Render(content)
+ require.NoError(t, err)
+ return strings.TrimSuffix(out, "\n")
+}
+
+// stripANSI removes all ANSI CSI escape sequences from s so two
+// renders with different colour state can be compared on their
+// visible glyphs alone.
+func stripANSI(s string) string {
+ var b strings.Builder
+ b.Grow(len(s))
+ i := 0
+ for i < len(s) {
+ if s[i] == 0x1b && i+1 < len(s) && s[i+1] == '[' {
+ j := i + 2
+ for j < len(s) {
+ c := s[j]
+ if c >= 0x40 && c <= 0x7e {
+ j++
+ break
+ }
+ j++
+ }
+ i = j
+ continue
+ }
+ b.WriteByte(s[i])
+ i++
+ }
+ return b.String()
+}
+
+// normalizeRender canonicalises a rendered glamour string for
+// visual-equivalence comparison: strip ANSI, drop per-line
+// trailing whitespace, drop leading/trailing blank lines, and
+// collapse consecutive blank lines to a single blank line.
+//
+// Glamour pads rendered lines with trailing spaces and adds top/
+// bottom block margins that differ subtly between "render the
+// whole document at once" and "render two halves and concatenate
+// them." Per F8 design principle D, those byte-level differences
+// are acceptable as long as the visible content matches; this
+// helper makes that comparison explicit.
+func normalizeRender(s string) string {
+ clean := stripANSI(s)
+ lines := strings.Split(clean, "\n")
+ for i, l := range lines {
+ lines[i] = strings.TrimRight(l, " \t")
+ }
+ // Collapse consecutive blank lines.
+ out := make([]string, 0, len(lines))
+ prevBlank := false
+ for _, l := range lines {
+ blank := l == ""
+ if blank && prevBlank {
+ continue
+ }
+ out = append(out, l)
+ prevBlank = blank
+ }
+ // Trim leading and trailing blanks.
+ for len(out) > 0 && out[0] == "" {
+ out = out[1:]
+ }
+ for len(out) > 0 && out[len(out)-1] == "" {
+ out = out[:len(out)-1]
+ }
+ return strings.Join(out, "\n")
+}
+
+// containsRawMarkdownSource reports whether the visible portion of
+// rendered contains literal markdown source markers that should
+// have been consumed by glamour. Used by T2 to assert that
+// intermediate streaming flushes don't leak raw source through to
+// the user. We deliberately only flag markers that glamour
+// removes during rendering ("```" fence delimiters, "|" table
+// pipes embedded in a line that also contains pipes β actual
+// table syntax β and bare "###" headers); pipes-in-prose and
+// dashes are too common to flag.
+func containsRawMarkdownSource(rendered string) bool {
+ clean := stripANSI(rendered)
+ if strings.Contains(clean, "```") {
+ return true
+ }
+ for _, line := range strings.Split(clean, "\n") {
+ if strings.HasPrefix(strings.TrimLeft(line, " \t"), "###") {
+ return true
+ }
+ }
+ return false
+}
+
+// -----------------------------------------------------------------------
+// T1: findSafeMarkdownBoundary unit tests.
+// -----------------------------------------------------------------------
+
+// TestFindSafeMarkdownBoundary_TableDriven exercises the
+// findSafeMarkdownBoundary decision tree across the full set of
+// constructs Β§4.4 calls out: plain paragraphs, fenced code (open
+// and closed), lists, tables, block quotes, and setext headers.
+func TestFindSafeMarkdownBoundary_TableDriven(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ content string
+ // want is the expected boundary; -1 means "no safe
+ // boundary." When >=0 the test asserts content[:want]
+ // ends after a blank-line separator and content[:want]
+ // is a complete prefix.
+ want int
+ }{
+ {
+ name: "empty",
+ content: "",
+ want: -1,
+ },
+ {
+ name: "single line",
+ content: "Just a single paragraph",
+ want: -1,
+ },
+ {
+ name: "two paragraphs",
+ content: "First paragraph.\n\nSecond paragraph.",
+ // boundary at start of "Second"
+ want: len("First paragraph.\n\n"),
+ },
+ {
+ name: "three paragraphs picks latest",
+ content: "First.\n\nSecond.\n\nThird.",
+ want: len("First.\n\nSecond.\n\n"),
+ },
+ {
+ name: "open fence at end",
+ content: "Para.\n\n```go\nfoo()\n",
+ // no closing fence β every blank-line candidate
+ // before content end is INSIDE the fence (the open
+ // fence opened at offset 7). Actually the ONLY
+ // blank line is between "Para." and "```go", so
+ // candidate boundary is right before "```go". At
+ // that point fence count = 0, even, but the line
+ // AFTER (the first non-blank) is "```go" which
+ // would change rendering of the prefix⦠hmm,
+ // actually it wouldn't change the prefix's
+ // rendering because the prefix is just "Para.\n\n".
+ // The boundary would be ACCEPTED. Let's check
+ // what our impl does.
+ want: len("Para.\n\n"),
+ },
+ {
+ name: "inside open fence: no candidate after open",
+ content: "Para.\n\n```go\nfoo()\n\nbar()\n",
+ // blank line after "foo()" is INSIDE the fence
+ // (fence count at that prefix = 1, odd), must
+ // reject. The earlier blank line between "Para."
+ // and "```go" should still be safe (fence count
+ // at that prefix = 0).
+ want: len("Para.\n\n"),
+ },
+ {
+ name: "closed fence followed by paragraph",
+ content: "Para1.\n\n```\nfoo()\n```\n\nPara2.",
+ // latest blank line is between "```" and "Para2.";
+ // fence count at that prefix = 2 (even), last
+ // non-blank line is "```" which is not a list/
+ // table/quote/setext.
+ want: len("Para1.\n\n```\nfoo()\n```\n\n"),
+ },
+ {
+ name: "open list at end",
+ content: "Para.\n\n- one\n- two\n",
+ // last non-blank line of any blank-bounded prefix
+ // is a list item; our boundary check rejects.
+ // The blank line between "Para." and "- one" is
+ // the only candidate, but the line AFTER (first
+ // non-blank of suffix) is "- one" β that's fine,
+ // a list opening doesn't change the prefix's
+ // rendering. So the boundary BEFORE the list is
+ // accepted.
+ want: len("Para.\n\n"),
+ },
+ {
+ name: "list interior: no boundary",
+ content: "- one\n- two\n",
+ // no blank line at all.
+ want: -1,
+ },
+ {
+ name: "closed list then paragraph",
+ content: "- one\n- two\n\nPara.",
+ // blank line after the list. Last non-blank line
+ // of prefix is "- two" β a list item β so the
+ // candidate is REJECTED. (Conservative: we don't
+ // know the list is "closed" without looking at
+ // what follows.)
+ want: -1,
+ },
+ {
+ name: "table at end",
+ content: "Para.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |\n",
+ // blank-line candidate is between "Para." and
+ // table opener. Last non-blank line of prefix is
+ // "Para." β fine. Line AFTER is "| a | b |"
+ // which is a table line; doesn't retroactively
+ // change "Para." Boundary accepted.
+ want: len("Para.\n\n"),
+ },
+ {
+ name: "table interior with internal blank line: no late boundary",
+ content: "| a | b |\n| --- | --- |\n\n| 1 | 2 |\n",
+ // the blank line in the middle is followed by
+ // another table line. Last non-blank line of
+ // prefix is "| --- | --- |" which contains a
+ // pipe β we reject.
+ want: -1,
+ },
+ {
+ name: "block quote at end",
+ content: "Para.\n\n> quoted\n> still quoted\n",
+ // Last non-blank line of any prefix that ends
+ // inside the quote block is a "> ..." line β
+ // rejected. The blank line BEFORE the quote
+ // gives a prefix of "Para.\n\n" β last non-blank
+ // "Para." β accepted.
+ want: len("Para.\n\n"),
+ },
+ {
+ name: "setext underline pending",
+ content: "Heading\n\n=====\n",
+ // blank line between "Heading" and "=====".
+ // Prefix = "Heading\n\n", last non-blank "Heading"
+ // β fine. But the FIRST non-blank line of the
+ // suffix is "=====", a setext-underline
+ // candidate. Splitting here would render the
+ // prefix as a paragraph "Heading", but the
+ // canonical render would treat the whole thing
+ // as a setext header. Reject.
+ //
+ // (Note: per CommonMark, a blank line between a
+ // paragraph and an underline actually breaks the
+ // setext, so the setext interpretation may not
+ // apply. But the boundary check is conservative
+ // β being wrong costs one slow frame, being
+ // over-aggressive costs visible breakage.)
+ want: -1,
+ },
+ {
+ name: "indented code at end of prefix",
+ content: "Para.\n\n code line\n\nNext.",
+ // prefix candidates:
+ // "Para.\n\n" β last non-blank "Para.", accepted
+ // "Para.\n\n code line\n\n" β last non-blank
+ // is " code line" which is indented 4
+ // spaces β REJECTED.
+ // Latest accepted is the first.
+ want: len("Para.\n\n"),
+ },
+ }
+
+ for _, c := range cases {
+ t.Run(c.name, func(t *testing.T) {
+ t.Parallel()
+ got := findSafeMarkdownBoundary(c.content)
+ require.Equalf(t, c.want, got,
+ "findSafeMarkdownBoundary(%q) = %d, want %d", c.content, got, c.want)
+ if got > 0 {
+ // Boundary must point to the start of a line
+ // (i.e. just after a newline) when the prefix
+ // is non-empty.
+ require.True(t, got <= len(c.content),
+ "boundary %d out of range (len=%d)", got, len(c.content))
+ if got > 0 && got <= len(c.content) {
+ require.Equal(t, byte('\n'), c.content[got-1],
+ "boundary %d does not sit immediately after a newline", got)
+ }
+ }
+ })
+ }
+}
+
+// -----------------------------------------------------------------------
+// T2: streaming-equivalence tests.
+// -----------------------------------------------------------------------
+
+// streamingScenarios returns the four canonical document shapes
+// that exercise different boundary-detection paths.
+func streamingScenarios() []struct {
+ name string
+ doc string
+} {
+ return []struct {
+ name string
+ doc string
+ }{
+ {
+ name: "plain-paragraphs",
+ doc: strings.Join([]string{
+ "This is the first paragraph of the document.",
+ "",
+ "Here is the second paragraph; it has some words.",
+ "",
+ "And a third paragraph for good measure.",
+ "",
+ "Finally a fourth paragraph to push past one boundary.",
+ }, "\n"),
+ },
+ {
+ name: "paragraphs-with-fence",
+ doc: strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "Some explanatory prose before the code.",
+ "",
+ "```go",
+ "func hello() {",
+ "\tfmt.Println(\"hi\")",
+ "}",
+ "```",
+ "",
+ "And a closing paragraph after the code block.",
+ }, "\n"),
+ },
+ {
+ name: "paragraphs-with-list",
+ doc: strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "- list item one",
+ "- list item two",
+ "- list item three",
+ "",
+ "Trailing paragraph.",
+ }, "\n"),
+ },
+ {
+ name: "paragraphs-with-table",
+ doc: strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "| col a | col b |",
+ "| ----- | ----- |",
+ "| 1 | 2 |",
+ "| 3 | 4 |",
+ "",
+ "Trailing paragraph after the table.",
+ }, "\n"),
+ },
+ }
+}
+
+// progressivePrefixes splits doc into n monotonically growing
+// byte prefixes, ending with the full document. n>=1.
+func progressivePrefixes(doc string, n int) []string {
+ if n < 1 {
+ n = 1
+ }
+ out := make([]string, 0, n)
+ for i := 1; i <= n; i++ {
+ // integer scaling so the last entry is exactly len(doc)
+ size := len(doc) * i / n
+ if i == n {
+ size = len(doc)
+ }
+ out = append(out, doc[:size])
+ }
+ return out
+}
+
+// TestStreamingMarkdown_FinalVisuallyEquivalent drives a sequence
+// of progressive prefixes through streamingMarkdown and asserts
+// the FINAL output is visually equivalent (per design principle
+// D) to a fresh full-document render. Strict byte-equality is
+// not the bar β see the comment in normalizeRender for why.
+func TestStreamingMarkdown_FinalVisuallyEquivalent(t *testing.T) {
+ t.Parallel()
+
+ const width = 80
+ const steps = 15
+
+ for _, sc := range streamingScenarios() {
+ t.Run(sc.name, func(t *testing.T) {
+ t.Parallel()
+ renderer := newTestRenderer(t, width)
+ var sm streamingMarkdown
+ prefixes := progressivePrefixes(sc.doc, steps)
+
+ var lastOut string
+ for _, p := range prefixes {
+ lastOut = sm.Render(p, width, renderer)
+ }
+
+ fresh := freshRender(t, sc.doc, width)
+ require.Equal(t, normalizeRender(fresh), normalizeRender(lastOut),
+ "final streaming output must match a fresh full render visually")
+ })
+ }
+}
+
+// TestStreamingMarkdown_IntermediateOutputsPlausible asserts that
+// every intermediate flush returns a non-empty string and does
+// not leak raw markdown source through to the user. This is the
+// "visually plausible" half of T2.
+func TestStreamingMarkdown_IntermediateOutputsPlausible(t *testing.T) {
+ t.Parallel()
+
+ const width = 80
+ const steps = 12
+
+ for _, sc := range streamingScenarios() {
+ t.Run(sc.name, func(t *testing.T) {
+ t.Parallel()
+ renderer := newTestRenderer(t, width)
+ var sm streamingMarkdown
+
+ for i, p := range progressivePrefixes(sc.doc, steps) {
+ if p == "" {
+ continue
+ }
+ out := sm.Render(p, width, renderer)
+ require.NotEmptyf(t, out, "step %d: empty render for prefix len %d", i, len(p))
+ require.Falsef(t, containsRawMarkdownSource(out),
+ "step %d: render leaked raw markdown source.\nprefix=%q\nout=%s",
+ i, p, normalizeRender(out))
+ }
+ })
+ }
+}
+
+// -----------------------------------------------------------------------
+// T3: cache invalidation tests.
+// -----------------------------------------------------------------------
+
+// TestStreamingMarkdown_WidthChangeInvalidates asserts that a
+// width change blows away the cached prefix so the next render
+// is keyed against the new width. We can't observe the cache
+// directly without reaching into the struct, so we assert the
+// observable contract: after a width change, the rendered output
+// reflects the new width AND the streamingMarkdown's internal
+// cache fields are reset to the new state.
+func TestStreamingMarkdown_WidthChangeInvalidates(t *testing.T) {
+ t.Parallel()
+
+ doc := "Para one.\n\nPara two.\n\nPara three."
+ r80 := newTestRenderer(t, 80)
+ r40 := newTestRenderer(t, 40)
+ var sm streamingMarkdown
+
+ out80 := sm.Render(doc, 80, r80)
+ require.Equal(t, 80, sm.width, "width must be cached after first render")
+ cachedPrefix := sm.stablePrefix
+
+ out40 := sm.Render(doc, 40, r40)
+ require.Equal(t, 40, sm.width, "width change must update cached width")
+ require.NotEqual(t, out80, out40,
+ "different widths must produce different rendered output")
+ // stablePrefix may legitimately have re-advanced after the
+ // reset (tryAdvanceFromEmpty), but if it has, it can no
+ // longer carry the OLD width's render. We assert the cache
+ // reset by checking that the cached prefix length is at
+ // most the current content length.
+ require.True(t, len(sm.stablePrefix) <= len(doc),
+ "stable prefix must be a prefix of the current content")
+ _ = cachedPrefix
+}
+
+// TestStreamingMarkdown_NonPrefixContentInvalidates verifies
+// that content which is NOT a prefix-extension of the cached
+// stable prefix triggers a Reset and a fresh render path. This
+// guards the "user retried the turn" case.
+func TestStreamingMarkdown_NonPrefixContentInvalidates(t *testing.T) {
+ t.Parallel()
+
+ const width = 80
+ r := newTestRenderer(t, width)
+ var sm streamingMarkdown
+
+ // Drive a streaming sequence so the cache picks up a stable
+ // prefix.
+ doc := "Para one.\n\nPara two.\n\nPara three."
+ for _, p := range progressivePrefixes(doc, 6) {
+ _ = sm.Render(p, width, r)
+ }
+ require.NotEmpty(t, sm.stablePrefix,
+ "stable prefix must be populated after streaming a multi-paragraph doc")
+
+ // Now switch to entirely different content (user retried).
+ other := "Completely different opening paragraph.\n\nAnd a second."
+ out := sm.Render(other, width, r)
+ require.NotEmpty(t, out)
+ // stablePrefix must be a prefix of `other`, i.e. cache was
+ // reset off the OLD content.
+ require.True(t, strings.HasPrefix(other, sm.stablePrefix),
+ "stable prefix must be reset to a prefix of the new content")
+
+ // Visual equivalence to a fresh render of `other`.
+ fresh := freshRender(t, other, width)
+ require.Equal(t, normalizeRender(fresh), normalizeRender(out),
+ "render after non-prefix content change must match a fresh render")
+}
+
+// TestStreamingMarkdown_ResetClearsCache asserts Reset() drops
+// every cached field; the next render is necessarily a full
+// render path.
+func TestStreamingMarkdown_ResetClearsCache(t *testing.T) {
+ t.Parallel()
+
+ const width = 80
+ r := newTestRenderer(t, width)
+ var sm streamingMarkdown
+
+ doc := "Para one.\n\nPara two.\n\nPara three."
+ _ = sm.Render(doc, width, r)
+ // The sample doc has safe boundaries so the cache should
+ // have advanced. If for some reason it didn't, we still
+ // want Reset to be a no-op-safe operation; assert the
+ // post-Reset state directly.
+ sm.Reset()
+ require.Equal(t, 0, sm.width)
+ require.Equal(t, "", sm.stablePrefix)
+ require.Equal(t, "", sm.stablePrefixRender)
+
+ // Next render must be a full render path. Drive one step
+ // and verify the output matches a fresh full render.
+ out := sm.Render(doc, width, r)
+ fresh := freshRender(t, doc, width)
+ require.Equal(t, normalizeRender(fresh), normalizeRender(out))
+}
+
+// -----------------------------------------------------------------------
+// T4: fallback safety.
+// -----------------------------------------------------------------------
+
+// TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders covers
+// the "one giant table being built character by character" case.
+// Every flush must fall back to a full render; the cache must
+// not advance into an unsafe state. We compare each flush to a
+// fresh full render of the same prefix; bytes must match for
+// each prefix individually.
+//
+// (Byte equality is sound here because no concatenation happens:
+// the streaming path delegates straight to renderer.Render when
+// the cache is empty and no safe boundary exists.)
+func TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders(t *testing.T) {
+ t.Parallel()
+
+ const width = 80
+
+ // One growing table β no blank lines anywhere, so no
+ // boundary candidate is ever found.
+ doc := strings.Join([]string{
+ "| col a | col b | col c |",
+ "| ----- | ----- | ----- |",
+ "| 1 | 2 | 3 |",
+ "| 4 | 5 | 6 |",
+ "| 7 | 8 | 9 |",
+ "| 10 | 11 | 12 |",
+ "| 13 | 14 | 15 |",
+ "| 16 | 17 | 18 |",
+ "| 19 | 20 | 21 |",
+ "| 22 | 23 | 24 |",
+ }, "\n")
+ require.Equal(t, -1, findSafeMarkdownBoundary(doc),
+ "sanity check: no blank lines, no safe boundary")
+
+ r := newTestRenderer(t, width)
+ var sm streamingMarkdown
+
+ prefixes := progressivePrefixes(doc, 10)
+ for i, p := range prefixes {
+ if p == "" {
+ continue
+ }
+ out := sm.Render(p, width, r)
+ fresh := freshRender(t, p, width)
+ require.Equalf(t, fresh, out,
+ "step %d (len=%d): streaming output must byte-equal a fresh render when boundary detection fails",
+ i, len(p))
+ }
+ // Cache must remain empty: no boundary was ever found, no
+ // width change occurred, no advance ever cached anything.
+ require.Equal(t, "", sm.stablePrefix,
+ "stable prefix must remain empty when no safe boundary ever exists")
+}
+
+// TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash is the
+// minimum-viability assertion of T4: even when boundary
+// detection fails on every flush the streaming path must not
+// crash and must produce non-empty output for non-empty input.
+func TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash(t *testing.T) {
+ t.Parallel()
+
+ const width = 80
+ r := newTestRenderer(t, width)
+ var sm streamingMarkdown
+
+ // A deeply-pathological input: a single line that grows
+ // one character at a time. There is never a blank-line
+ // separator so the cache is never advanced.
+ src := "The quick brown fox jumps over the lazy dog."
+ for i := 1; i <= len(src); i++ {
+ out := sm.Render(src[:i], width, r)
+ require.NotEmpty(t, out, "streaming output must not be empty for non-empty input")
+ }
+}
+
+// -----------------------------------------------------------------------
+// Integration assertions on the wired-in path.
+// -----------------------------------------------------------------------
+
+// -----------------------------------------------------------------------
+// T5 / T6 / T7: anywhere-in-prefix hazards (B1 / B2 / B3 from the
+// F8 round-2 review). For each hazard we drive every progressive
+// prefix of a document that exercises the hazard through the cache
+// and assert two contracts:
+//
+// 1. The cached stable prefix never contains the hazard. If the
+// hazard line is at byte offset H, then after every flush
+// len(sm.stablePrefix) <= H. This is the "no silent
+// corruption" half β the algorithm cannot accept a boundary
+// that splits across the hazard.
+//
+// 2. The final flush is visually equivalent to a fresh full
+// render of the complete document. This is the same T2-style
+// equivalence assertion ported to the new doc shapes.
+// -----------------------------------------------------------------------
+
+// nonBlankLines returns the non-blank visible lines of s with
+// per-line trailing whitespace trimmed. Used to compare two
+// rendered fragments for content equivalence when paragraph-
+// margin behaviour legitimately differs between a single fresh
+// render and a streaming split render (per F8 design principle D
+// β visual equivalence is the bar, byte-equivalence is not).
+//
+// Some glamour block types (notably HTML blocks and reference
+// link definitions) interact with adjacent paragraph blocks
+// during a single render β adjacency effectively suppresses the
+// blank-line margin between blocks. When the streaming path
+// renders the prefix and trail in separate calls, the seam is
+// re-introduced as a blank line. The visible TEXT is identical;
+// only the inter-block margin differs.
+func nonBlankLines(s string) []string {
+ clean := stripANSI(s)
+ out := make([]string, 0)
+ for _, l := range strings.Split(clean, "\n") {
+ l = strings.TrimRight(l, " \t")
+ if strings.TrimSpace(l) == "" {
+ continue
+ }
+ out = append(out, l)
+ }
+ return out
+}
+
+// runProgressiveBoundaryRespectTest is the shared body of T5/T6/T7.
+// It accepts a document and the byte offset of the line whose
+// PRESENCE in the prefix must trigger the hazard reject; the
+// cached stable prefix may never extend past hazardLineOffset.
+//
+// The final-output equivalence check is content-based (non-blank
+// lines compared) rather than full-normalization: see
+// nonBlankLines for the reason.
+func runProgressiveBoundaryRespectTest(t *testing.T, doc string, hazardLineOffset int) {
+ t.Helper()
+ const width = 80
+ const steps = 25
+
+ renderer := newTestRenderer(t, width)
+ var sm streamingMarkdown
+
+ prefixes := progressivePrefixes(doc, steps)
+ var lastOut string
+ for i, p := range prefixes {
+ if p == "" {
+ continue
+ }
+ lastOut = sm.Render(p, width, renderer)
+ require.NotEmptyf(t, lastOut, "step %d: empty render", i)
+ require.LessOrEqualf(t, len(sm.stablePrefix), hazardLineOffset,
+ "step %d: cached stable prefix advanced past the hazard line\n"+
+ "prefix len=%d, hazard at %d, sm.stablePrefix=%q",
+ i, len(sm.stablePrefix), hazardLineOffset, sm.stablePrefix)
+ }
+
+ fresh := freshRender(t, doc, width)
+ require.Equal(t, nonBlankLines(fresh), nonBlankLines(lastOut),
+ "final streaming output must contain the same non-blank lines as a fresh full render")
+}
+
+// TestStreamingMarkdown_LooseListContinuation locks in the B1 fix.
+// A loose list followed by a continuation paragraph and then a
+// trailing paragraph creates a candidate boundary between the list
+// item and its continuation; the trailing non-blank line of that
+// candidate prefix is the continuation paragraph (not a list
+// marker), so the line-only check would accept it. The
+// anywhere-in-prefix list-marker check rejects it.
+func TestStreamingMarkdown_LooseListContinuation(t *testing.T) {
+ t.Parallel()
+
+ doc := strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "- item one",
+ "",
+ " continuation paragraph still belongs to item one",
+ "",
+ "- item two",
+ "",
+ "Trailing paragraph after the list.",
+ }, "\n")
+
+ // The first list marker line begins after "Intro paragraph.\n\n".
+ // The cached stable prefix may include that boundary (BEFORE
+ // the list opens) but must never advance into the list.
+ hazardOffset := strings.Index(doc, "- item one")
+ require.Greater(t, hazardOffset, 0, "test setup")
+
+ runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_HTMLBlock locks in the B2 fix. A raw HTML
+// block followed by a paragraph creates a candidate boundary
+// between the closed HTML block and the trailing paragraph. The
+// anywhere-in-prefix HTML-opener check rejects any boundary that
+// would include the HTML block in the stable prefix.
+func TestStreamingMarkdown_HTMLBlock(t *testing.T) {
+ t.Parallel()
+
+ doc := strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "<div>",
+ "some block content",
+ "</div>",
+ "",
+ "Trailing paragraph after the HTML block.",
+ }, "\n")
+
+ hazardOffset := strings.Index(doc, "<div>")
+ require.Greater(t, hazardOffset, 0, "test setup")
+
+ runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_HTMLBlockType7 covers HTML block type 7
+// (CommonMark): a generic open/close tag whose name is NOT in the
+// fixed type-6 set still opens an HTML block and must forfeit any
+// boundary that would split the block off from following content.
+func TestStreamingMarkdown_HTMLBlockType7(t *testing.T) {
+ t.Parallel()
+
+ doc := strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "<custom-tag>",
+ "some block content",
+ "</custom-tag>",
+ "",
+ "Trailing paragraph after the custom-tag block.",
+ }, "\n")
+
+ hazardOffset := strings.Index(doc, "<custom-tag>")
+ require.Greater(t, hazardOffset, 0, "test setup")
+
+ runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_LinkRefDefinition locks in the B3 fix. A
+// reference link definition followed by a paragraph that uses the
+// reference creates a boundary candidate between the def and the
+// paragraph; rendering them in separate glamour passes loses the
+// definition. The anywhere-in-prefix ref-def check rejects.
+func TestStreamingMarkdown_LinkRefDefinition(t *testing.T) {
+ t.Parallel()
+
+ doc := strings.Join([]string{
+ "Intro paragraph.",
+ "",
+ "[ref]: http://example.com",
+ "",
+ "Trailing paragraph that links to [the example][ref] inline.",
+ }, "\n")
+
+ hazardOffset := strings.Index(doc, "[ref]:")
+ require.Greater(t, hazardOffset, 0, "test setup")
+
+ runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestAssistantStreamingContent_ResetOnClearCache guards the
+// integration contract that ClearItemCaches (style change) drops
+// the streaming-markdown cache. Without this, a style change
+// would leave the OLD style's ANSI sequences embedded in the
+// stable-prefix render and the next flush would visually mix
+// styles.
+func TestAssistantStreamingContent_ResetOnClearCache(t *testing.T) {
+ t.Parallel()
+
+ sty := styles.CharmtonePantera()
+ doc := "Para one.\n\nPara two.\n\nPara three."
+ msg := finishedAssistantMessage("stream-clear", doc)
+ item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+ const width = 80
+ _ = item.RawRender(width)
+ // Drive a second message that extends the content so the
+ // streaming cache has a chance to advance (if it would).
+ doc2 := doc + "\n\nFour."
+ item.SetMessage(finishedAssistantMessage("stream-clear", doc2))
+ _ = item.RawRender(width)
+
+ // Now wipe the caches the way ClearItemCaches does.
+ item.clearCache()
+
+ require.Equal(t, "", item.streamingContent.stablePrefix,
+ "clearCache must Reset the streaming-markdown cache")
+ require.Equal(t, "", item.streamingContent.stablePrefixRender)
+ require.Equal(t, 0, item.streamingContent.width)
+}
@@ -0,0 +1,740 @@
+package chat
+
+import (
+ "strings"
+
+ "charm.land/glamour/v2"
+ "github.com/charmbracelet/crush/internal/ui/common"
+)
+
+// streamingMarkdown caches a "stable prefix" glamour render so each
+// streaming flush only re-renders the trailing portion of the
+// document. F8 of docs/notes/2026-05-12-chat-rendering-perf.md.
+//
+// The boundary between "stable" and "trailing" is detected by
+// [findSafeMarkdownBoundary]: a position immediately after a blank
+// line at which we can prove no markdown construct is open
+// (fenced code block, list, table, block quote, setext header).
+//
+// Two renders concatenated are NOT generally equal to a single
+// render of the whole document β glamour's wrap state is reset
+// between calls. The boundary check is therefore deliberately
+// conservative; whenever it has the slightest doubt the call
+// falls back to a full render and the cache is left untouched.
+//
+// Invariants:
+//
+// - stablePrefix is always a literal byte prefix of the most
+// recently rendered content. If a new content does not have
+// stablePrefix as its prefix the cache is dropped.
+// - stablePrefixRender is the glamour render of stablePrefix
+// alone, with surrounding whitespace trimmed for clean
+// concatenation.
+// - width is the glamour wrap width that produced
+// stablePrefixRender. A width change drops the cache.
+type streamingMarkdown struct {
+ width int
+ stablePrefix string
+ stablePrefixRender string
+}
+
+// Reset drops every cached field. After Reset the next Render call
+// is guaranteed to be a full render.
+func (s *streamingMarkdown) Reset() {
+ s.width = 0
+ s.stablePrefix = ""
+ s.stablePrefixRender = ""
+}
+
+// Render returns the glamour render of content at the given width,
+// reusing the cached stable-prefix render when it is safe to do so.
+// On any uncertainty the call falls back to a full render via
+// renderer and leaves the cache untouched (or drops it).
+//
+// The returned string has its trailing newline trimmed to match
+// the existing renderMarkdown contract on AssistantMessageItem.
+//
+// Concurrency: glamour's Render is stateful and not safe for
+// concurrent invocation on a shared renderer. Crush's TUI is
+// single-threaded so production never contends, but parallel
+// callers (most notably the test suite) must serialize. We hold
+// [common.LockMarkdownRenderer] for the entire prefix +
+// trailing render sequence so other goroutines cannot interleave
+// their own Render calls and corrupt goldmark's BlockStack.
+func (s *streamingMarkdown) Render(content string, width int, renderer *glamour.TermRenderer) string {
+ mu := common.LockMarkdownRenderer(renderer)
+ mu.Lock()
+ defer mu.Unlock()
+ full := func() string {
+ out, err := renderer.Render(content)
+ if err != nil {
+ return content
+ }
+ return strings.TrimSuffix(out, "\n")
+ }
+
+ // Width change OR content not a prefix-extension: drop cache,
+ // full render, optionally try to seed a fresh boundary on this
+ // call (step "f" in the design note).
+ if width != s.width || !strings.HasPrefix(content, s.stablePrefix) {
+ s.Reset()
+ s.width = width
+ out := full()
+ s.tryAdvanceFromEmpty(content, width, renderer)
+ return out
+ }
+
+ boundary := findSafeMarkdownBoundary(content)
+ if boundary < 0 {
+ // No safe boundary anywhere yet. Full render; do not
+ // modify the cache (a future flush may find one).
+ return full()
+ }
+
+ if boundary <= len(s.stablePrefix) {
+ // Cached prefix already covers an at-least-as-late
+ // boundary. Render the trailing partial fresh and glue.
+ trail := content[len(s.stablePrefix):]
+ return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer))
+ }
+
+ // boundary > len(stablePrefix): we have a NEW chunk of safe
+ // content. Render the new chunk, append to stablePrefixRender,
+ // promote the boundary, then render the remaining trail.
+ newChunk := content[len(s.stablePrefix):boundary]
+ newChunkRender := s.renderTrailing(newChunk, renderer)
+ s.stablePrefixRender = glueRenders(s.stablePrefixRender, newChunkRender)
+ s.stablePrefix = content[:boundary]
+
+ trail := content[boundary:]
+ if trail == "" {
+ // boundary == len(content): no trailing content. Returning
+ // the cached prefix render directly is correct.
+ return s.stablePrefixRender
+ }
+ return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer))
+}
+
+// tryAdvanceFromEmpty seeds the cache from a fresh state. We've
+// already paid the cost of a full render of `content`; if there is
+// a safe boundary inside it, render the prefix once more (cheap
+// relative to the full render we just did) and cache it so the
+// next flush can avoid the full work.
+//
+// This is the optional optimisation step "f" from the design
+// note. We render the prefix separately rather than try to
+// recover it from the full render output because two renders
+// concatenated β a single render of the whole, and we prefer the
+// cached prefix render to be byte-for-byte what we'd produce on a
+// future cached call.
+func (s *streamingMarkdown) tryAdvanceFromEmpty(content string, width int, renderer *glamour.TermRenderer) {
+ boundary := findSafeMarkdownBoundary(content)
+ if boundary <= 0 {
+ return
+ }
+ prefix := content[:boundary]
+ out, err := renderer.Render(prefix)
+ if err != nil {
+ return
+ }
+ s.stablePrefix = prefix
+ s.stablePrefixRender = trimGlamourMargins(out)
+ s.width = width
+}
+
+// renderTrailing renders a trailing partial as a fresh glamour
+// document and trims the surrounding whitespace so it can be
+// concatenated to a cached prefix render without doubled blank
+// lines.
+func (s *streamingMarkdown) renderTrailing(text string, renderer *glamour.TermRenderer) string {
+ if text == "" {
+ return ""
+ }
+ out, err := renderer.Render(text)
+ if err != nil {
+ return text
+ }
+ return trimGlamourMargins(out)
+}
+
+// glueRenders concatenates two glamour-rendered fragments with a
+// single blank line separator. Glamour outputs typically carry
+// their own surrounding margins; trimming on both sides and
+// gluing with "\n\n" prevents the visible double-margin seam.
+//
+// Empty fragments are tolerated so the same helper works for the
+// "boundary == len(content)" path where there is no trailing
+// segment.
+func glueRenders(prefix, trail string) string {
+ prefix = trimGlamourMargins(prefix)
+ trail = trimGlamourMargins(trail)
+ switch {
+ case prefix == "" && trail == "":
+ return ""
+ case prefix == "":
+ return trail
+ case trail == "":
+ return prefix
+ default:
+ return prefix + "\n\n" + trail
+ }
+}
+
+// trimGlamourMargins strips leading and trailing whitespace
+// (including newlines) from a glamour-rendered fragment.
+// Glamour adds a leading blank line for documents that open with
+// a heading or paragraph, plus a trailing newline; both must be
+// removed before concatenation.
+func trimGlamourMargins(s string) string {
+ return strings.Trim(s, " \t\n")
+}
+
+// findSafeMarkdownBoundary returns the byte offset of the END of
+// the latest safe boundary in content, i.e. the offset such that
+// content[:boundary] is a valid stable-prefix candidate. The
+// returned offset always points immediately after a blank-line
+// separator, so concatenating a fresh render of content[boundary:]
+// to a cached render of content[:boundary] does not require glamour
+// to share state across the cut.
+//
+// Returns -1 when no safe boundary exists. SAFETY FIRST: any time
+// we have the slightest doubt we return -1 and let the caller fall
+// back to a full render.
+//
+// Decision tree, in order of preference (latest boundary wins):
+//
+// 1. Walk backward through every "blank line" position p such that
+// content[:p] ends with "\n\n" (or "\n[ \t]*\n").
+// 2. For each candidate, check that content[:p] has an even
+// number of triple-backtick fence lines (no open fenced
+// block). Any odd count means we'd be cutting inside a fence
+// and mis-syntax-highlighting the trailing partial.
+// 2b. Reject if any line in content[:p] (outside fenced blocks)
+// is a list-marker line, an HTML-block opener, or a link
+// reference definition. See [prefixHasOpenHazard] for the
+// reasoning behind these "anywhere in prefix" rejects.
+// 3. Reject if the last non-blank line of content[:p] is:
+// - a list item marker line ("^\s*([-*+]|\d+\.)\s")
+// - a table line (contains "|")
+// - a block quote ("^\s*>")
+// - a setext header underline ("^=+\s*$" or "^-+\s*$")
+// - an indented code line (4+ leading spaces or a tab)
+// 4. Reject if the line immediately AFTER the boundary (skipping
+// leading blank lines) looks like a setext underline (a line
+// of '=' or '-' only). Rendering the prefix as a paragraph
+// would change once the underline arrived; that's exactly the
+// "splitting changes the prefix render" hazard Β§4.4 calls out.
+//
+// Returns the byte offset of the first character AFTER the blank
+// line, i.e. the start of the trailing segment.
+func findSafeMarkdownBoundary(content string) int {
+ if len(content) == 0 {
+ return -1
+ }
+
+ // Iterate every blank-line position from latest to earliest.
+ for p := blankLineBefore(content, len(content)); p > 0; p = blankLineBefore(content, p-1) {
+ if !isSafeBoundaryAt(content, p) {
+ continue
+ }
+ return p
+ }
+ return -1
+}
+
+// blankLineBefore returns the byte offset of the first character
+// AFTER the latest blank-line separator that ends strictly before
+// `until`. A blank-line separator is a sequence "\n([ \t]*\n)+"
+// β one newline, then one or more lines containing only spaces or
+// tabs and terminated by another newline. The returned offset is
+// the start of the first non-blank line that follows the
+// separator (or the position immediately after the final newline,
+// if no further content remains).
+//
+// Returns -1 when no blank-line separator exists before `until`.
+func blankLineBefore(content string, until int) int {
+ if until <= 0 {
+ return -1
+ }
+ // Walk backward looking for a newline followed (after optional
+ // blank-line content) by another newline. We track the latest
+ // newline we've seen; if the next earlier newline has only
+ // blank chars between them, we have a blank-line separator
+ // and the boundary sits immediately after the latest newline.
+ end := until
+ for end > 0 {
+ nl := strings.LastIndexByte(content[:end], '\n')
+ if nl < 0 {
+ return -1
+ }
+ // Look for an earlier newline whose gap to nl is empty
+ // or whitespace only.
+ prev := strings.LastIndexByte(content[:nl], '\n')
+ for prev >= 0 {
+ gap := content[prev+1 : nl]
+ if isBlankOrSpaces(gap) {
+ return nl + 1
+ }
+ // Gap had non-whitespace; nl is not a blank-line
+ // separator. Move up: try with the earlier newline as
+ // the new "nl" candidate.
+ break
+ }
+ end = nl
+ }
+ return -1
+}
+
+// isBlankOrSpaces reports whether s consists entirely of spaces
+// and tabs (or is empty).
+func isBlankOrSpaces(s string) bool {
+ for i := range len(s) {
+ if s[i] != ' ' && s[i] != '\t' {
+ return false
+ }
+ }
+ return true
+}
+
+// isSafeBoundaryAt reports whether content[:p] is a safe stable
+// prefix. p must be a blank-line boundary (start of a line, with a
+// blank line immediately preceding).
+//
+// Beyond the last-line checks, three "anywhere in the prefix"
+// hazards force a reject because they cannot be reliably reasoned
+// about by inspecting the trailing line alone. For each of these
+// the simplest, safest rule was chosen β see prefixHasOpenHazard.
+func isSafeBoundaryAt(content string, p int) bool {
+ prefix := content[:p]
+
+ // (2) Even number of triple-backtick fence lines.
+ if countFenceLines(prefix)%2 != 0 {
+ return false
+ }
+
+ // (2b) Anywhere-in-prefix hazards: open list (B1), HTML block
+ // opener (B2), reference link definition (B3). Any of these
+ // anywhere in the prefix forces a fallback.
+ if prefixHasOpenHazard(prefix) {
+ return false
+ }
+
+ // (3) Inspect the last non-blank line of the prefix.
+ lastLine := lastNonBlankLine(prefix)
+ if lastLine != "" && lineOpensConstruct(lastLine) {
+ return false
+ }
+
+ // (4) If anything follows, make sure it doesn't look like a
+ // setext underline that would retroactively turn the last
+ // paragraph of the prefix into a header.
+ if rest := content[p:]; rest != "" {
+ first := firstNonBlankLine(rest)
+ if isSetextUnderlineCandidate(first) {
+ return false
+ }
+ }
+
+ return true
+}
+
+// prefixHasOpenHazard reports whether prefix contains any of three
+// constructs that cannot be safely cut at a blank-line boundary
+// even when the immediately preceding line looks fine. Each check
+// uses the SIMPLEST viable conservative rule per the F8 round-2
+// review:
+//
+// B1 (loose lists). A loose list has a blank line between an item
+// and a continuation paragraph that begins with indentation
+// but no list marker. If a candidate boundary lands on that
+// blank line, the prefix's trailing non-blank line is the
+// continuation paragraph, NOT a list marker, so the last-line
+// check would accept it even though the list is still open.
+//
+// Rule chosen: any list-marker line ANYWHERE in the prefix
+// forces -1. This is overly conservative β it forfeits
+// boundary advancement past a closed list β but it eliminates
+// the entire bug class with zero parsing of CommonMark's
+// loose-list closure semantics. We retain the most useful
+// boundary in practice: the one BEFORE the list opens (no
+// marker has appeared in the prefix yet).
+//
+// B2 (HTML blocks). CommonMark defines seven HTML-block opener
+// patterns (script/pre/style/textarea, comments, processing
+// instructions, CDATA, declarations, recognised tag names).
+// If the prefix opens an HTML block that the suffix closes,
+// splitting renders the prefix as raw HTML and the suffix as
+// prose.
+//
+// Rule chosen: any HTML-block opener anywhere in the prefix
+// forces -1. Same trade-off as B1 β the typical assistant
+// output contains no raw HTML, so the perf cost is zero in
+// the common case.
+//
+// B3 (reference link definitions). A line of the form
+// "[label]: <url>" defines a link reference that the suffix
+// may later use as "[text][label]". Splitting the document
+// loses the definition because each half is rendered as an
+// independent glamour document.
+//
+// Rule chosen: any reference link definition line anywhere in
+// the prefix forces -1. Suffix-side reference detection is
+// fragile (three syntaxes: [text][label], [label][], [label]),
+// so the prefix-side check is the simpler safe choice.
+//
+// All three rules accept the perf hit of "no boundary after a
+// list / HTML block / link def" in exchange for guaranteed
+// soundness. If profiling shows this kills the F8 win on real
+// streaming traces, the next iteration can promote each rule to
+// its less-conservative variant (closure-aware list tracking,
+// per-tag HTML close detection, suffix-aware ref tracking).
+func prefixHasOpenHazard(prefix string) bool {
+ inFence := false
+ for line := range splitLines(prefix) {
+ // Track fenced state so list/html/ref patterns inside a
+ // fenced code block do not falsely trigger the hazards.
+ if isFenceLine(line) {
+ inFence = !inFence
+ continue
+ }
+ if inFence {
+ continue
+ }
+ trimmed := strings.TrimLeft(line, " \t")
+ if trimmed == "" {
+ continue
+ }
+ // B1: any list-item marker.
+ if isListItemMarker(trimmed) {
+ return true
+ }
+ // B2: HTML block opener.
+ if isHTMLBlockOpener(line) {
+ return true
+ }
+ // B3: link reference definition.
+ if isLinkRefDefinition(line) {
+ return true
+ }
+ }
+ return false
+}
+
+// countFenceLines counts lines that begin a fenced code block in
+// the CommonMark sense: a line whose first non-whitespace run is
+// at least three consecutive backticks (or tildes). Each such
+// line toggles the fenced state, so an even count means every
+// opened fence has been closed.
+//
+// We accept up to three leading spaces of indentation (CommonMark
+// rule) and require the fence characters to be the FIRST
+// non-whitespace content of the line. We deliberately do NOT
+// attempt to parse info-strings or differentiate opener from
+// closer beyond toggling β a closing fence is just any line
+// whose first non-whitespace run is >=3 of the same fence char.
+func countFenceLines(s string) int {
+ n := 0
+ for line := range splitLines(s) {
+ if isFenceLine(line) {
+ n++
+ }
+ }
+ return n
+}
+
+// isFenceLine reports whether line opens or closes a fenced code
+// block.
+func isFenceLine(line string) bool {
+ // Strip up to 3 spaces of indentation.
+ i := 0
+ for i < len(line) && i < 3 && line[i] == ' ' {
+ i++
+ }
+ if i >= len(line) {
+ return false
+ }
+ c := line[i]
+ if c != '`' && c != '~' {
+ return false
+ }
+ run := 0
+ for i < len(line) && line[i] == c {
+ i++
+ run++
+ }
+ return run >= 3
+}
+
+// lastNonBlankLine returns the last non-blank line of s, or ""
+// when every line is blank.
+func lastNonBlankLine(s string) string {
+ last := ""
+ for line := range splitLines(s) {
+ if strings.TrimSpace(line) != "" {
+ last = line
+ }
+ }
+ return last
+}
+
+// firstNonBlankLine returns the first non-blank line of s, or ""
+// when every line is blank.
+func firstNonBlankLine(s string) string {
+ for line := range splitLines(s) {
+ if strings.TrimSpace(line) != "" {
+ return line
+ }
+ }
+ return ""
+}
+
+// splitLines yields the lines of s without their terminators. The
+// final segment is yielded even if not newline-terminated.
+func splitLines(s string) func(yield func(string) bool) {
+ return func(yield func(string) bool) {
+ start := 0
+ for i := 0; i < len(s); i++ {
+ if s[i] == '\n' {
+ if !yield(s[start:i]) {
+ return
+ }
+ start = i + 1
+ }
+ }
+ if start <= len(s)-1 {
+ yield(s[start:])
+ }
+ }
+}
+
+// lineOpensConstruct reports whether line keeps a markdown
+// construct open across the boundary. We err conservatively β
+// any case that smells like list/table/quote/setext/indented-code
+// returns true.
+func lineOpensConstruct(line string) bool {
+ // Indented code: a tab, or 4+ leading spaces.
+ if len(line) > 0 && line[0] == '\t' {
+ return true
+ }
+ if strings.HasPrefix(line, " ") {
+ return true
+ }
+
+ trimmed := strings.TrimLeft(line, " \t")
+ if trimmed == "" {
+ return false
+ }
+
+ // Block quote.
+ if trimmed[0] == '>' {
+ return true
+ }
+
+ // List item: "- " "* " "+ " or "<digits>. " or "<digits>) ".
+ if isListItemMarker(trimmed) {
+ return true
+ }
+
+ // Table: any pipe character anywhere in the line. Conservative:
+ // pipe-in-prose is rare and the cost of bailing is one slow
+ // frame.
+ if strings.ContainsRune(line, '|') {
+ return true
+ }
+
+ // Setext underline candidate as the LAST line of the prefix:
+ // this would be a setext header for an even-earlier paragraph.
+ // Refuse to split at all in this case β the boundary is right
+ // in the middle of a header.
+ if isSetextUnderlineCandidate(trimmed) {
+ return true
+ }
+
+ return false
+}
+
+// isListItemMarker reports whether line (already left-trimmed)
+// starts with a CommonMark list-item marker followed by a space
+// or tab.
+func isListItemMarker(line string) bool {
+ if line == "" {
+ return false
+ }
+ c := line[0]
+ if c == '-' || c == '*' || c == '+' {
+ if len(line) >= 2 && (line[1] == ' ' || line[1] == '\t') {
+ return true
+ }
+ return false
+ }
+ // Ordered list: digits followed by '.' or ')' and a space.
+ i := 0
+ for i < len(line) && line[i] >= '0' && line[i] <= '9' {
+ i++
+ }
+ if i == 0 || i > 9 {
+ return false
+ }
+ if i >= len(line) {
+ return false
+ }
+ if line[i] != '.' && line[i] != ')' {
+ return false
+ }
+ if i+1 >= len(line) {
+ return false
+ }
+ return line[i+1] == ' ' || line[i+1] == '\t'
+}
+
+// isSetextUnderlineCandidate reports whether line (with optional
+// leading whitespace) consists entirely of '=' or entirely of '-'
+// characters with optional trailing whitespace. CommonMark
+// requires no leading whitespace on the underline; we accept up
+// to three spaces for safety so an indented underline still
+// blocks a split.
+func isSetextUnderlineCandidate(line string) bool {
+ // Strip leading whitespace.
+ i := 0
+ for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+ i++
+ }
+ if i == len(line) {
+ return false
+ }
+ c := line[i]
+ if c != '=' && c != '-' {
+ return false
+ }
+ j := i
+ for j < len(line) && line[j] == c {
+ j++
+ }
+ // Allow trailing whitespace.
+ for j < len(line) {
+ if line[j] != ' ' && line[j] != '\t' {
+ return false
+ }
+ j++
+ }
+ // Need at least one underline character. "-" alone is also a
+ // list marker without a trailing space; the listItem check
+ // covers the marker case before we get here.
+ return j-i >= 1
+}
+
+// isHTMLBlockOpener reports whether line begins one of the seven
+// CommonMark HTML block patterns. We accept up to three spaces of
+// leading indentation (CommonMark rule). Matching is intentionally
+// loose β we only need to know the line "looks like an HTML
+// block start", not parse the contained markup.
+func isHTMLBlockOpener(line string) bool {
+ // Strip up to 3 spaces of indentation.
+ i := 0
+ for i < len(line) && i < 3 && line[i] == ' ' {
+ i++
+ }
+ rest := line[i:]
+ if len(rest) < 2 || rest[0] != '<' {
+ return false
+ }
+
+ // Type 2: HTML comment "<!--".
+ if strings.HasPrefix(rest, "<!--") {
+ return true
+ }
+ // Type 3: processing instruction "<?".
+ if strings.HasPrefix(rest, "<?") {
+ return true
+ }
+ // Type 5: CDATA "<![CDATA[".
+ if strings.HasPrefix(rest, "<![CDATA[") {
+ return true
+ }
+ // Type 4: declaration "<!" followed by an ASCII letter.
+ if len(rest) >= 3 && rest[1] == '!' && isASCIILetter(rest[2]) {
+ return true
+ }
+
+ // Type 1: <script | <pre | <style | <textarea (case-insensitive)
+ // followed by whitespace, '>', end-of-line, or other non-name
+ // terminators. Use a permissive HasPrefix check on lowercase.
+ low := strings.ToLower(rest)
+ for _, t := range []string{"<script", "<pre", "<style", "<textarea"} {
+ if strings.HasPrefix(low, t) {
+ next := byte(0)
+ if len(low) > len(t) {
+ next = low[len(t)]
+ }
+ if next == 0 || next == ' ' || next == '\t' || next == '>' {
+ return true
+ }
+ }
+ }
+
+ // Types 6 & 7: open or close of a block-level tag.
+ //
+ // Type 6 matches a fixed CommonMark tag set; type 7 matches any
+ // otherwise-valid open/close tag whose name is not in the
+ // script/pre/style/textarea family. We collapse both into a
+ // single check: the line must start with '<' or '</' followed
+ // by an ASCII letter. This deliberately mirrors the other
+ // hazards β when in doubt, forfeit the boundary. Lines like
+ // "<3", "<-", "<<", or mid-line "<foo>" do NOT trigger because
+ // we require the line to *start* (after up to 3 spaces) with
+ // '<letter' or '</letter'.
+ j := 1 // past '<'
+ if j < len(rest) && rest[j] == '/' {
+ j++
+ }
+ if j >= len(rest) || !isASCIILetter(rest[j]) {
+ return false
+ }
+ return true
+}
+
+// isASCIILetter reports whether b is an ASCII letter.
+func isASCIILetter(b byte) bool {
+ return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
+}
+
+// isLinkRefDefinition reports whether line matches a CommonMark
+// link reference definition opener. The conservative pattern:
+//
+// ^[ ]{0,3}\[[^\]]+\]:\s*\S+
+//
+// i.e. up to 3 spaces, then a bracketed label (no nested ']'),
+// then a colon, then whitespace, then at least one non-whitespace
+// character of destination. We do not validate the destination β
+// presence of a ref-def opener anywhere in the prefix is enough
+// to forfeit the boundary.
+func isLinkRefDefinition(line string) bool {
+ i := 0
+ for i < len(line) && i < 3 && line[i] == ' ' {
+ i++
+ }
+ if i >= len(line) || line[i] != '[' {
+ return false
+ }
+ i++
+ labelStart := i
+ for i < len(line) && line[i] != ']' {
+ i++
+ }
+ if i >= len(line) || i == labelStart {
+ // No closing bracket, or empty label.
+ return false
+ }
+ // i points at ']'.
+ i++
+ if i >= len(line) || line[i] != ':' {
+ return false
+ }
+ i++
+ // Skip required whitespace.
+ for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+ i++
+ }
+ // At least one non-whitespace character of destination.
+ return i < len(line)
+}