repl: Add ability to evaluate Markdown code blocks (#15100)

Thorsten Ball , Nathan , and Antonio created

This adds the ability to evaluate TypeScript and Python code blocks in
Markdown files.

cc @rgbkrk 

Demo:


https://github.com/user-attachments/assets/55352de5-68f3-4aef-920a-78ca205651ba



Release Notes:

- N/A

---------

Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: Antonio <antonio@zed.dev>

Change summary

Cargo.lock                     |   4 
crates/language/src/buffer.rs  |  37 +++++
crates/repl/Cargo.toml         |   4 
crates/repl/src/repl_editor.rs | 232 +++++++++++++++++++++++++++++++----
4 files changed, 247 insertions(+), 30 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8678,6 +8678,7 @@ dependencies = [
  "image",
  "indoc",
  "language",
+ "languages",
  "log",
  "multi_buffer",
  "project",
@@ -8689,6 +8690,9 @@ dependencies = [
  "smol",
  "terminal_view",
  "theme",
+ "tree-sitter-md",
+ "tree-sitter-python",
+ "tree-sitter-typescript",
  "ui",
  "util",
  "uuid",

crates/language/src/buffer.rs 🔗

@@ -3058,6 +3058,43 @@ impl BufferSnapshot {
         })
     }
 
+    pub fn injections_intersecting_range<T: ToOffset>(
+        &self,
+        range: Range<T>,
+    ) -> impl Iterator<Item = (Range<usize>, &Arc<Language>)> + '_ {
+        let offset_range = range.start.to_offset(self)..range.end.to_offset(self);
+
+        let mut syntax_matches = self.syntax.matches(offset_range, self, |grammar| {
+            grammar
+                .injection_config
+                .as_ref()
+                .map(|config| &config.query)
+        });
+
+        let configs = syntax_matches
+            .grammars()
+            .iter()
+            .map(|grammar| grammar.injection_config.as_ref())
+            .collect::<Vec<_>>();
+
+        iter::from_fn(move || {
+            let ranges = syntax_matches.peek().and_then(|mat| {
+                let config = &configs[mat.grammar_index]?;
+                let content_capture_range = mat.captures.iter().find_map(|capture| {
+                    if capture.index == config.content_capture_ix {
+                        Some(capture.node.byte_range())
+                    } else {
+                        None
+                    }
+                })?;
+                let language = self.language_at(content_capture_range.start)?;
+                Some((content_capture_range, language))
+            });
+            syntax_matches.advance();
+            ranges
+        })
+    }
+
     pub fn runnable_ranges(
         &self,
         range: Range<Anchor>,

crates/repl/Cargo.toml 🔗

@@ -47,7 +47,11 @@ gpui = { workspace = true, features = ["test-support"] }
 http_client = { workspace = true, features = ["test-support"] }
 indoc.workspace = true
 language = { workspace = true, features = ["test-support"] }
+languages = { workspace = true, features = ["test-support"] }
 project = { workspace = true, features = ["test-support"] }
 settings = { workspace = true, features = ["test-support"] }
 theme = { workspace = true, features = ["test-support"] }
+tree-sitter-md.workspace = true
+tree-sitter-typescript.workspace = true
+tree-sitter-python.workspace = true
 util = { workspace = true, features = ["test-support"] }

crates/repl/src/repl_editor.rs 🔗

@@ -27,10 +27,11 @@ pub fn run(editor: WeakView<Editor>, cx: &mut WindowContext) -> Result<()> {
         return Ok(());
     };
 
-    let (ranges, next_cell_point) = snippet_ranges(&buffer.read(cx).snapshot(), selected_range);
+    let (runnable_ranges, next_cell_point) =
+        runnable_ranges(&buffer.read(cx).snapshot(), selected_range);
 
-    for range in ranges {
-        let Some(language) = multibuffer.read(cx).language_at(range.start, cx) else {
+    for runnable_range in runnable_ranges {
+        let Some(language) = multibuffer.read(cx).language_at(runnable_range.start, cx) else {
             continue;
         };
 
@@ -76,8 +77,11 @@ pub fn run(editor: WeakView<Editor>, cx: &mut WindowContext) -> Result<()> {
         let next_cursor;
         {
             let snapshot = multibuffer.read(cx).read(cx);
-            selected_text = snapshot.text_for_range(range.clone()).collect::<String>();
-            anchor_range = snapshot.anchor_before(range.start)..snapshot.anchor_after(range.end);
+            selected_text = snapshot
+                .text_for_range(runnable_range.clone())
+                .collect::<String>();
+            anchor_range = snapshot.anchor_before(runnable_range.start)
+                ..snapshot.anchor_after(runnable_range.end);
             next_cursor = next_cell_point.map(|point| snapshot.anchor_after(point));
         }
 
@@ -111,10 +115,13 @@ pub fn session(editor: WeakView<Editor>, cx: &mut AppContext) -> SessionSupport
 
     match kernelspec {
         Some(kernelspec) => SessionSupport::Inactive(Box::new(kernelspec)),
-        None => match language.name().as_ref() {
-            "TypeScript" | "Python" => SessionSupport::RequiresSetup(language.name()),
-            _ => SessionSupport::Unsupported,
-        },
+        None => {
+            if language_supported(&language) {
+                SessionSupport::RequiresSetup(language.name())
+            } else {
+                SessionSupport::Unsupported
+            }
+        }
     }
 }
 
@@ -156,7 +163,7 @@ pub fn shutdown(editor: WeakView<Editor>, cx: &mut WindowContext) {
     });
 }
 
-fn snippet_range(buffer: &BufferSnapshot, start_row: u32, end_row: u32) -> Range<Point> {
+fn cell_range(buffer: &BufferSnapshot, start_row: u32, end_row: u32) -> Range<Point> {
     let mut snippet_end_row = end_row;
     while buffer.is_line_blank(snippet_end_row) && snippet_end_row > start_row {
         snippet_end_row -= 1;
@@ -164,8 +171,8 @@ fn snippet_range(buffer: &BufferSnapshot, start_row: u32, end_row: u32) -> Range
     Point::new(start_row, 0)..Point::new(snippet_end_row, buffer.line_len(snippet_end_row))
 }
 
-// Returns the ranges of the snippets in the buffer and the next range for moving the cursor to
-fn jupytext_snippets(
+// Returns the ranges of the snippets in the buffer and the next point for moving the cursor to
+fn jupytext_cells(
     buffer: &BufferSnapshot,
     range: Range<Point>,
 ) -> (Vec<Range<Point>>, Option<Point>) {
@@ -208,19 +215,19 @@ fn jupytext_snippets(
                 .iter()
                 .any(|prefix| buffer.contains_str_at(Point::new(current_row, 0), prefix))
             {
-                snippets.push(snippet_range(buffer, snippet_start_row, current_row - 1));
+                snippets.push(cell_range(buffer, snippet_start_row, current_row - 1));
 
                 if current_row <= range.end.row {
                     snippet_start_row = current_row;
                 } else {
-                    // Return our snippets as well as the next range for moving the cursor to
+                    // Return our snippets as well as the next point for moving the cursor to
                     return (snippets, Some(Point::new(current_row, 0)));
                 }
             }
         }
 
         // Go to the end of the buffer (no more jupytext cells found)
-        snippets.push(snippet_range(
+        snippets.push(cell_range(
             buffer,
             snippet_start_row,
             buffer.max_point().row,
@@ -230,26 +237,52 @@ fn jupytext_snippets(
     (snippets, None)
 }
 
-fn snippet_ranges(
+fn runnable_ranges(
     buffer: &BufferSnapshot,
     range: Range<Point>,
 ) -> (Vec<Range<Point>>, Option<Point>) {
-    let (jupytext_snippets, next_cursor) = jupytext_snippets(buffer, range.clone());
+    if let Some(language) = buffer.language() {
+        if language.name().as_ref() == "Markdown" {
+            return (markdown_code_blocks(buffer, range.clone()), None);
+        }
+    }
+
+    let (jupytext_snippets, next_cursor) = jupytext_cells(buffer, range.clone());
     if !jupytext_snippets.is_empty() {
         return (jupytext_snippets, next_cursor);
     }
 
-    let snippet_range = snippet_range(buffer, range.start.row, range.end.row);
+    let snippet_range = cell_range(buffer, range.start.row, range.end.row);
     let start_language = buffer.language_at(snippet_range.start);
     let end_language = buffer.language_at(snippet_range.end);
 
-    if let Some((start, end)) = start_language.zip(end_language) {
-        if start == end {
-            return (vec![snippet_range], None);
-        }
+    if start_language
+        .zip(end_language)
+        .map_or(false, |(start, end)| start == end)
+    {
+        (vec![snippet_range], None)
+    } else {
+        (Vec::new(), None)
     }
+}
 
-    (Vec::new(), None)
+// We allow markdown code blocks to end in a trailing newline in order to render the output
+// below the final code fence. This is different than our behavior for selections and Jupytext cells.
+fn markdown_code_blocks(buffer: &BufferSnapshot, range: Range<Point>) -> Vec<Range<Point>> {
+    buffer
+        .injections_intersecting_range(range)
+        .filter(|(_, language)| language_supported(language))
+        .map(|(content_range, _)| {
+            buffer.offset_to_point(content_range.start)..buffer.offset_to_point(content_range.end)
+        })
+        .collect()
+}
+
+fn language_supported(language: &Arc<Language>) -> bool {
+    match language.name().as_ref() {
+        "TypeScript" | "Python" => true,
+        _ => false,
+    }
 }
 
 fn get_language(editor: WeakView<Editor>, cx: &mut AppContext) -> Option<Arc<Language>> {
@@ -262,9 +295,9 @@ fn get_language(editor: WeakView<Editor>, cx: &mut AppContext) -> Option<Arc<Lan
 #[cfg(test)]
 mod tests {
     use super::*;
-    use gpui::Context;
+    use gpui::{Context, Task};
     use indoc::indoc;
-    use language::{Buffer, Language, LanguageConfig};
+    use language::{Buffer, Language, LanguageConfig, LanguageRegistry};
 
     #[gpui::test]
     fn test_snippet_ranges(cx: &mut AppContext) {
@@ -295,7 +328,7 @@ mod tests {
         let snapshot = buffer.read(cx).snapshot();
 
         // Single-point selection
-        let (snippets, _) = snippet_ranges(&snapshot, Point::new(0, 4)..Point::new(0, 4));
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(0, 4)..Point::new(0, 4));
         let snippets = snippets
             .into_iter()
             .map(|range| snapshot.text_for_range(range).collect::<String>())
@@ -303,7 +336,7 @@ mod tests {
         assert_eq!(snippets, vec!["print(1 + 1)"]);
 
         // Multi-line selection
-        let (snippets, _) = snippet_ranges(&snapshot, Point::new(0, 5)..Point::new(2, 0));
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(0, 5)..Point::new(2, 0));
         let snippets = snippets
             .into_iter()
             .map(|range| snapshot.text_for_range(range).collect::<String>())
@@ -316,7 +349,7 @@ mod tests {
         );
 
         // Trimming multiple trailing blank lines
-        let (snippets, _) = snippet_ranges(&snapshot, Point::new(0, 5)..Point::new(5, 0));
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(0, 5)..Point::new(5, 0));
 
         let snippets = snippets
             .into_iter()
@@ -369,7 +402,7 @@ mod tests {
         let snapshot = buffer.read(cx).snapshot();
 
         // Jupytext snippet surrounding an empty selection
-        let (snippets, _) = snippet_ranges(&snapshot, Point::new(2, 5)..Point::new(2, 5));
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(2, 5)..Point::new(2, 5));
 
         let snippets = snippets
             .into_iter()
@@ -385,7 +418,7 @@ mod tests {
         );
 
         // Jupytext snippets intersecting a non-empty selection
-        let (snippets, _) = snippet_ranges(&snapshot, Point::new(2, 5)..Point::new(6, 2));
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(2, 5)..Point::new(6, 2));
         let snippets = snippets
             .into_iter()
             .map(|range| snapshot.text_for_range(range).collect::<String>())
@@ -409,4 +442,143 @@ mod tests {
             ]
         );
     }
+
+    #[gpui::test]
+    fn test_markdown_code_blocks(cx: &mut AppContext) {
+        let markdown = languages::language("markdown", tree_sitter_md::language());
+        let typescript =
+            languages::language("typescript", tree_sitter_typescript::language_typescript());
+        let python = languages::language("python", tree_sitter_python::language());
+        let language_registry = Arc::new(LanguageRegistry::new(
+            Task::ready(()),
+            cx.background_executor().clone(),
+        ));
+        language_registry.add(markdown.clone());
+        language_registry.add(typescript.clone());
+        language_registry.add(python.clone());
+
+        // Two code blocks intersecting with selection
+        let buffer = cx.new_model(|cx| {
+            let mut buffer = Buffer::local(
+                indoc! { r#"
+                    Hey this is Markdown!
+
+                    ```typescript
+                    let foo = 999;
+                    console.log(foo + 1999);
+                    ```
+
+                    ```typescript
+                    console.log("foo")
+                    ```
+                    "#
+                },
+                cx,
+            );
+            buffer.set_language_registry(language_registry.clone());
+            buffer.set_language(Some(markdown.clone()), cx);
+            buffer
+        });
+        let snapshot = buffer.read(cx).snapshot();
+
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(3, 5)..Point::new(8, 5));
+        let snippets = snippets
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+
+        assert_eq!(
+            snippets,
+            vec![
+                indoc! { r#"
+                    let foo = 999;
+                    console.log(foo + 1999);
+                    "#
+                },
+                "console.log(\"foo\")\n"
+            ]
+        );
+
+        // Three code blocks intersecting with selection
+        let buffer = cx.new_model(|cx| {
+            let mut buffer = Buffer::local(
+                indoc! { r#"
+                    Hey this is Markdown!
+
+                    ```typescript
+                    let foo = 999;
+                    console.log(foo + 1999);
+                    ```
+
+                    ```ts
+                    console.log("foo")
+                    ```
+
+                    ```typescript
+                    console.log("another code block")
+                    ```
+                "# },
+                cx,
+            );
+            buffer.set_language_registry(language_registry.clone());
+            buffer.set_language(Some(markdown.clone()), cx);
+            buffer
+        });
+        let snapshot = buffer.read(cx).snapshot();
+
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(3, 5)..Point::new(12, 5));
+        let snippets = snippets
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+
+        assert_eq!(
+            snippets,
+            vec![
+                indoc! { r#"
+                    let foo = 999;
+                    console.log(foo + 1999);
+                    "#
+                },
+                "console.log(\"foo\")\n",
+                "console.log(\"another code block\")\n",
+            ]
+        );
+
+        // Python code block
+        let buffer = cx.new_model(|cx| {
+            let mut buffer = Buffer::local(
+                indoc! { r#"
+                    Hey this is Markdown!
+
+                    ```python
+                    print("hello there")
+                    print("hello there")
+                    print("hello there")
+                    ```
+                "# },
+                cx,
+            );
+            buffer.set_language_registry(language_registry.clone());
+            buffer.set_language(Some(markdown.clone()), cx);
+            buffer
+        });
+        let snapshot = buffer.read(cx).snapshot();
+
+        let (snippets, _) = runnable_ranges(&snapshot, Point::new(4, 5)..Point::new(5, 5));
+        let snippets = snippets
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+
+        assert_eq!(
+            snippets,
+            vec![indoc! { r#"
+                print("hello there")
+                print("hello there")
+                print("hello there")
+                "#
+            },]
+        );
+    }
 }