markdown: Track code block metadata in parser (#28543)

Bennet Bo Fenner created

This allows us to not scan the codeblock content for newlines on every
frame in `active_thread`

Release Notes:

- N/A

Change summary

crates/agent/src/active_thread.rs | 119 ++++++++--------
crates/markdown/src/markdown.rs   |  96 ++++++-------
crates/markdown/src/parser.rs     | 235 ++++++++++++++++++++------------
3 files changed, 247 insertions(+), 203 deletions(-)

Detailed changes

crates/agent/src/active_thread.rs 🔗

@@ -22,12 +22,11 @@ use gpui::{
 };
 use language::{Buffer, LanguageRegistry};
 use language_model::{LanguageModelRegistry, LanguageModelToolUseId, Role};
-use markdown::parser::CodeBlockKind;
-use markdown::{Markdown, MarkdownElement, MarkdownStyle, ParsedMarkdown, without_fences};
+use markdown::parser::{CodeBlockKind, CodeBlockMetadata};
+use markdown::{Markdown, MarkdownElement, MarkdownStyle, ParsedMarkdown};
 use project::ProjectItem as _;
 use rope::Point;
 use settings::{Settings as _, update_settings_file};
-use std::ops::Range;
 use std::path::Path;
 use std::rc::Rc;
 use std::sync::Arc;
@@ -66,6 +65,8 @@ pub struct ActiveThread {
     open_feedback_editors: HashMap<MessageId, Entity<Editor>>,
 }
 
+const MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK: usize = 5;
+
 struct RenderedMessage {
     language_registry: Arc<LanguageRegistry>,
     segments: Vec<RenderedMessageSegment>,
@@ -295,7 +296,7 @@ fn render_markdown_code_block(
     ix: usize,
     kind: &CodeBlockKind,
     parsed_markdown: &ParsedMarkdown,
-    codeblock_range: Range<usize>,
+    metadata: CodeBlockMetadata,
     active_thread: Entity<ActiveThread>,
     workspace: WeakEntity<Workspace>,
     _window: &Window,
@@ -467,15 +468,6 @@ fn render_markdown_code_block(
         .element_background
         .blend(cx.theme().colors().editor_foreground.opacity(0.01));
 
-    const CODE_FENCES_LINE_COUNT: usize = 2;
-    const MAX_COLLAPSED_LINES: usize = 5;
-
-    let line_count = parsed_markdown.source()[codeblock_range.clone()]
-        .bytes()
-        .filter(|c| *c == b'\n')
-        .count()
-        .saturating_sub(CODE_FENCES_LINE_COUNT - 1);
-
     let codeblock_header = h_flex()
         .group("codeblock_header")
         .p_1()
@@ -505,16 +497,14 @@ fn render_markdown_code_block(
                         .on_click({
                             let active_thread = active_thread.clone();
                             let parsed_markdown = parsed_markdown.clone();
+                            let code_block_range = metadata.content_range.clone();
                             move |_event, _window, cx| {
                                 active_thread.update(cx, |this, cx| {
                                     this.copied_code_block_ids.insert((message_id, ix));
 
-                                    let code = without_fences(
-                                        &parsed_markdown.source()[codeblock_range.clone()],
-                                    )
-                                    .to_string();
-
-                                    cx.write_to_clipboard(ClipboardItem::new_string(code.clone()));
+                                    let code = parsed_markdown.source()[code_block_range.clone()]
+                                        .to_string();
+                                    cx.write_to_clipboard(ClipboardItem::new_string(code));
 
                                     cx.spawn(async move |this, cx| {
                                         cx.background_executor()
@@ -536,38 +526,41 @@ fn render_markdown_code_block(
                         }),
                     ),
                 )
-                .when(line_count > MAX_COLLAPSED_LINES, |header| {
-                    header.child(
-                        IconButton::new(
-                            ("expand-collapse-code", ix),
-                            if is_expanded {
-                                IconName::ChevronUp
+                .when(
+                    metadata.line_count > MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK,
+                    |header| {
+                        header.child(
+                            IconButton::new(
+                                ("expand-collapse-code", ix),
+                                if is_expanded {
+                                    IconName::ChevronUp
+                                } else {
+                                    IconName::ChevronDown
+                                },
+                            )
+                            .icon_color(Color::Muted)
+                            .shape(ui::IconButtonShape::Square)
+                            .tooltip(Tooltip::text(if is_expanded {
+                                "Collapse Code"
                             } else {
-                                IconName::ChevronDown
-                            },
+                                "Expand Code"
+                            }))
+                            .on_click({
+                                let active_thread = active_thread.clone();
+                                move |_event, _window, cx| {
+                                    active_thread.update(cx, |this, cx| {
+                                        let is_expanded = this
+                                            .expanded_code_blocks
+                                            .entry((message_id, ix))
+                                            .or_insert(false);
+                                        *is_expanded = !*is_expanded;
+                                        cx.notify();
+                                    });
+                                }
+                            }),
                         )
-                        .icon_color(Color::Muted)
-                        .shape(ui::IconButtonShape::Square)
-                        .tooltip(Tooltip::text(if is_expanded {
-                            "Collapse Code"
-                        } else {
-                            "Expand Code"
-                        }))
-                        .on_click({
-                            let active_thread = active_thread.clone();
-                            move |_event, _window, cx| {
-                                active_thread.update(cx, |this, cx| {
-                                    let is_expanded = this
-                                        .expanded_code_blocks
-                                        .entry((message_id, ix))
-                                        .or_insert(false);
-                                    *is_expanded = !*is_expanded;
-                                    cx.notify();
-                                });
-                            }
-                        }),
-                    )
-                }),
+                    },
+                ),
         );
 
     v_flex()
@@ -578,13 +571,16 @@ fn render_markdown_code_block(
         .border_color(cx.theme().colors().border_variant)
         .bg(cx.theme().colors().editor_background)
         .child(codeblock_header)
-        .when(line_count > MAX_COLLAPSED_LINES, |this| {
-            if is_expanded {
-                this.h_full()
-            } else {
-                this.max_h_40()
-            }
-        })
+        .when(
+            metadata.line_count > MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK,
+            |this| {
+                if is_expanded {
+                    this.h_full()
+                } else {
+                    this.max_h_40()
+                }
+            },
+        )
 }
 
 fn open_markdown_link(
@@ -1896,13 +1892,13 @@ impl ActiveThread {
                                     render: Arc::new({
                                         let workspace = workspace.clone();
                                         let active_thread = cx.entity();
-                                        move |kind, parsed_markdown, range, window, cx| {
+                                        move |kind, parsed_markdown, range, metadata, window, cx| {
                                             render_markdown_code_block(
                                                 message_id,
                                                 range.start,
                                                 kind,
                                                 parsed_markdown,
-                                                range,
+                                                metadata,
                                                 active_thread.clone(),
                                                 workspace.clone(),
                                                 window,
@@ -1912,7 +1908,7 @@ impl ActiveThread {
                                     }),
                                     transform: Some(Arc::new({
                                         let active_thread = cx.entity();
-                                        move |el, range, _, cx| {
+                                        move |el, range, metadata, _, cx| {
                                             let is_expanded = active_thread
                                                 .read(cx)
                                                 .expanded_code_blocks
@@ -1920,7 +1916,10 @@ impl ActiveThread {
                                                 .copied()
                                                 .unwrap_or(false);
 
-                                            if is_expanded {
+                                            if is_expanded
+                                                || metadata.line_count
+                                                    <= MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK
+                                            {
                                                 return el;
                                             }
                                             el.child(

crates/markdown/src/markdown.rs 🔗

@@ -18,6 +18,7 @@ use gpui::{
     TextStyleRefinement, actions, point, quad,
 };
 use language::{Language, LanguageRegistry, Rope};
+use parser::CodeBlockMetadata;
 use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown};
 use pulldown_cmark::Alignment;
 use sum_tree::TreeMap;
@@ -99,10 +100,19 @@ pub enum CodeBlockRenderer {
     },
 }
 
-pub type CodeBlockRenderFn =
-    Arc<dyn Fn(&CodeBlockKind, &ParsedMarkdown, Range<usize>, &mut Window, &App) -> Div>;
+pub type CodeBlockRenderFn = Arc<
+    dyn Fn(
+        &CodeBlockKind,
+        &ParsedMarkdown,
+        Range<usize>,
+        CodeBlockMetadata,
+        &mut Window,
+        &App,
+    ) -> Div,
+>;
 
-pub type CodeBlockTransformFn = Arc<dyn Fn(AnyDiv, Range<usize>, &mut Window, &App) -> AnyDiv>;
+pub type CodeBlockTransformFn =
+    Arc<dyn Fn(AnyDiv, Range<usize>, CodeBlockMetadata, &mut Window, &App) -> AnyDiv>;
 
 actions!(markdown, [Copy, CopyAsMarkdown]);
 
@@ -603,6 +613,8 @@ impl Element for MarkdownElement {
             0
         };
 
+        let mut current_code_block_metadata = None;
+
         for (range, event) in parsed_markdown.events.iter() {
             match event {
                 MarkdownEvent::Start(tag) => {
@@ -641,7 +653,7 @@ impl Element for MarkdownElement {
                                 markdown_end,
                             );
                         }
-                        MarkdownTag::CodeBlock(kind) => {
+                        MarkdownTag::CodeBlock { kind, metadata } => {
                             let language = match kind {
                                 CodeBlockKind::Fenced => None,
                                 CodeBlockKind::FencedLang(language) => {
@@ -654,6 +666,8 @@ impl Element for MarkdownElement {
                                 _ => None,
                             };
 
+                            current_code_block_metadata = Some(metadata.clone());
+
                             let is_indented = matches!(kind, CodeBlockKind::Indented);
 
                             match (&self.code_block_renderer, is_indented) {
@@ -686,8 +700,14 @@ impl Element for MarkdownElement {
                                     builder.push_div(code_block, range, markdown_end);
                                 }
                                 (CodeBlockRenderer::Custom { render, .. }, _) => {
-                                    let parent_container =
-                                        render(kind, &parsed_markdown, range.clone(), window, cx);
+                                    let parent_container = render(
+                                        kind,
+                                        &parsed_markdown,
+                                        range.clone(),
+                                        metadata.clone(),
+                                        window,
+                                        cx,
+                                    );
 
                                     builder.push_div(parent_container, range, markdown_end);
 
@@ -852,12 +872,22 @@ impl Element for MarkdownElement {
                             builder.pop_text_style();
                         }
 
+                        let metadata = current_code_block_metadata.take();
+
                         if let CodeBlockRenderer::Custom {
-                            transform: Some(modify),
+                            transform: Some(transform),
                             ..
                         } = &self.code_block_renderer
                         {
-                            builder.modify_current_div(|el| modify(el, range.clone(), window, cx));
+                            builder.modify_current_div(|el| {
+                                transform(
+                                    el,
+                                    range.clone(),
+                                    metadata.clone().unwrap_or_default(),
+                                    window,
+                                    cx,
+                                )
+                            });
                         }
 
                         if matches!(
@@ -866,9 +896,13 @@ impl Element for MarkdownElement {
                         ) {
                             builder.flush_text();
                             builder.modify_current_div(|el| {
-                                let code =
-                                    without_fences(parsed_markdown.source()[range.clone()].trim())
-                                        .to_string();
+                                let content_range = parser::extract_code_block_content_range(
+                                    parsed_markdown.source()[range.clone()].trim(),
+                                );
+                                let content_range = content_range.start + range.start
+                                    ..content_range.end + range.start;
+
+                                let code = parsed_markdown.source()[content_range].to_string();
                                 let codeblock = render_copy_code_block_button(
                                     range.end,
                                     code,
@@ -1507,43 +1541,3 @@ impl RenderedText {
             .find(|link| link.source_range.contains(&source_index))
     }
 }
-
-/// Some markdown blocks are indented, and others have e.g. ```rust … ``` around them.
-/// If this block is fenced with backticks, strip them off (and the language name).
-/// We use this when copying code blocks to the clipboard.
-pub fn without_fences(mut markdown: &str) -> &str {
-    if let Some(opening_backticks) = markdown.find("```") {
-        markdown = &markdown[opening_backticks..];
-
-        // Trim off the next newline. This also trims off a language name if it's there.
-        if let Some(newline) = markdown.find('\n') {
-            markdown = &markdown[newline + 1..];
-        }
-    };
-
-    if let Some(closing_backticks) = markdown.rfind("```") {
-        markdown = &markdown[..closing_backticks];
-    };
-
-    markdown
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_without_fences() {
-        let input = "```rust\nlet x = 5;\n```";
-        assert_eq!(without_fences(input), "let x = 5;\n");
-
-        let input = "   ```\nno language\n```   ";
-        assert_eq!(without_fences(input), "no language\n");
-
-        let input = "plain text";
-        assert_eq!(without_fences(input), "plain text");
-
-        let input = "```python\nprint('hello')\nprint('world')\n```";
-        assert_eq!(without_fences(input), "print('hello')\nprint('world')\n");
-    }
-}

crates/markdown/src/parser.rs 🔗

@@ -65,11 +65,33 @@ pub fn parse_markdown(
                         within_metadata = true;
                         MarkdownTag::MetadataBlock(kind)
                     }
+                    pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
+                        MarkdownTag::CodeBlock {
+                            kind: CodeBlockKind::Indented,
+                            metadata: CodeBlockMetadata {
+                                content_range: range.start + 1..range.end + 1,
+                                line_count: 1,
+                            },
+                        }
+                    }
                     pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
                         ref info,
                     )) => {
+                        let content_range = extract_code_block_content_range(&text[range.clone()]);
+                        let content_range =
+                            content_range.start + range.start..content_range.end + range.start;
+
+                        let line_count = text[content_range.clone()]
+                            .bytes()
+                            .filter(|c| *c == b'\n')
+                            .count();
+                        let metadata = CodeBlockMetadata {
+                            content_range,
+                            line_count,
+                        };
+
                         let info = info.trim();
-                        MarkdownTag::CodeBlock(if info.is_empty() {
+                        let kind = if info.is_empty() {
                             CodeBlockKind::Fenced
                             // Languages should never contain a slash, and PathRanges always should.
                             // (Models are told to specify them relative to a workspace root.)
@@ -81,9 +103,68 @@ pub fn parse_markdown(
                             let language = SharedString::from(info.to_string());
                             language_names.insert(language.clone());
                             CodeBlockKind::FencedLang(language)
-                        })
+                        };
+
+                        MarkdownTag::CodeBlock { kind, metadata }
+                    }
+                    pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
+                    pulldown_cmark::Tag::Heading {
+                        level,
+                        id,
+                        classes,
+                        attrs,
+                    } => {
+                        let id = id.map(|id| SharedString::from(id.into_string()));
+                        let classes = classes
+                            .into_iter()
+                            .map(|c| SharedString::from(c.into_string()))
+                            .collect();
+                        let attrs = attrs
+                            .into_iter()
+                            .map(|(key, value)| {
+                                (
+                                    SharedString::from(key.into_string()),
+                                    value.map(|v| SharedString::from(v.into_string())),
+                                )
+                            })
+                            .collect();
+                        MarkdownTag::Heading {
+                            level,
+                            id,
+                            classes,
+                            attrs,
+                        }
+                    }
+                    pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
+                    pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
+                    pulldown_cmark::Tag::Item => MarkdownTag::Item,
+                    pulldown_cmark::Tag::FootnoteDefinition(label) => {
+                        MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
+                    }
+                    pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
+                    pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
+                    pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
+                    pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
+                    pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
+                    pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
+                    pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
+                    pulldown_cmark::Tag::Image {
+                        link_type,
+                        dest_url,
+                        title,
+                        id,
+                    } => MarkdownTag::Image {
+                        link_type,
+                        dest_url: SharedString::from(dest_url.into_string()),
+                        title: SharedString::from(title.into_string()),
+                        id: SharedString::from(id.into_string()),
+                    },
+                    pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
+                    pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
+                    pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
+                    pulldown_cmark::Tag::DefinitionListDefinition => {
+                        MarkdownTag::DefinitionListDefinition
                     }
-                    tag => tag.into(),
                 };
                 events.push((range, MarkdownEvent::Start(tag)))
             }
@@ -252,7 +333,10 @@ pub enum MarkdownTag {
     BlockQuote,
 
     /// A code block.
-    CodeBlock(CodeBlockKind),
+    CodeBlock {
+        kind: CodeBlockKind,
+        metadata: CodeBlockMetadata,
+    },
 
     /// A HTML block.
     HtmlBlock,
@@ -323,96 +407,26 @@ pub enum CodeBlockKind {
     FencedSrc(PathWithRange),
 }
 
-impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
-    fn from(tag: pulldown_cmark::Tag) -> Self {
-        match tag {
-            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
-            pulldown_cmark::Tag::Heading {
-                level,
-                id,
-                classes,
-                attrs,
-            } => {
-                let id = id.map(|id| SharedString::from(id.into_string()));
-                let classes = classes
-                    .into_iter()
-                    .map(|c| SharedString::from(c.into_string()))
-                    .collect();
-                let attrs = attrs
-                    .into_iter()
-                    .map(|(key, value)| {
-                        (
-                            SharedString::from(key.into_string()),
-                            value.map(|v| SharedString::from(v.into_string())),
-                        )
-                    })
-                    .collect();
-                MarkdownTag::Heading {
-                    level,
-                    id,
-                    classes,
-                    attrs,
-                }
-            }
-            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
-            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
-                pulldown_cmark::CodeBlockKind::Indented => {
-                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
-                }
-                pulldown_cmark::CodeBlockKind::Fenced(info) => {
-                    let info = info.trim();
-                    MarkdownTag::CodeBlock(if info.is_empty() {
-                        CodeBlockKind::Fenced
-                    } else if info.contains('/') {
-                        // Languages should never contain a slash, and PathRanges always should.
-                        // (Models are told to specify them relative to a workspace root.)
-                        CodeBlockKind::FencedSrc(PathWithRange::new(info))
-                    } else {
-                        CodeBlockKind::FencedLang(SharedString::from(info.to_string()))
-                    })
-                }
-            },
-            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
-            pulldown_cmark::Tag::Item => MarkdownTag::Item,
-            pulldown_cmark::Tag::FootnoteDefinition(label) => {
-                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
-            }
-            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
-            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
-            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
-            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
-            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
-            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
-            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
-            pulldown_cmark::Tag::Link {
-                link_type,
-                dest_url,
-                title,
-                id,
-            } => MarkdownTag::Link {
-                link_type,
-                dest_url: SharedString::from(dest_url.into_string()),
-                title: SharedString::from(title.into_string()),
-                id: SharedString::from(id.into_string()),
-            },
-            pulldown_cmark::Tag::Image {
-                link_type,
-                dest_url,
-                title,
-                id,
-            } => MarkdownTag::Image {
-                link_type,
-                dest_url: SharedString::from(dest_url.into_string()),
-                title: SharedString::from(title.into_string()),
-                id: SharedString::from(id.into_string()),
-            },
-            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
-            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
-            pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
-            pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
-            pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
+#[derive(Default, Clone, Debug, PartialEq)]
+pub struct CodeBlockMetadata {
+    pub content_range: Range<usize>,
+    pub line_count: usize,
+}
+
+pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
+    let mut range = 0..text.len();
+    if text.starts_with("```") {
+        range.start += 3;
+
+        if let Some(newline_ix) = text[range.clone()].find('\n') {
+            range.start += newline_ix + 1;
         }
     }
+
+    if !range.is_empty() && text.ends_with("```") {
+        range.end -= 3;
+    }
+    range
 }
 
 /// Represents either an owned or inline string. Motivation for this is to make `SubstitutedText`
@@ -570,4 +584,41 @@ mod tests {
             )
         )
     }
+
+    #[test]
+    fn test_code_block_metadata() {
+        assert_eq!(
+            parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
+            (
+                vec![
+                    (
+                        0..37,
+                        Start(CodeBlock {
+                            kind: CodeBlockKind::FencedLang("rust".into()),
+                            metadata: CodeBlockMetadata {
+                                content_range: 8..34,
+                                line_count: 3
+                            }
+                        })
+                    ),
+                    (8..34, Text),
+                    (0..37, End(MarkdownTagEnd::CodeBlock)),
+                ],
+                HashSet::from(["rust".into()]),
+                HashSet::new()
+            )
+        )
+    }
+
+    #[test]
+    fn test_extract_code_block_content_range() {
+        let input = "```rust\nlet x = 5;\n```";
+        assert_eq!(extract_code_block_content_range(input), 8..19);
+
+        let input = "plain text";
+        assert_eq!(extract_code_block_content_range(input), 0..10);
+
+        let input = "```python\nprint('hello')\nprint('world')\n```";
+        assert_eq!(extract_code_block_content_range(input), 10..40);
+    }
 }