Support Jupytext-style line comments for REPL evaluation ranges (#15073)

Nathan Sobo , Antonio Scandurra , Antonio , Thorsten , and Thorsten Ball created

This adds support for detecting line comments in the
[Jupytext](https://jupytext.readthedocs.io/) format. When line comments
such as `# %%` is present, invoking `repl: run` will evaluate the code
between these line comments as a unit.

/cc @rgbkrk 

```py
# %%
# This is my first block
print(1)
print(2)

# %%
# This is my second block
print(3)
```

Release Notes:

- N/A

---------

Co-authored-by: Antonio Scandurra <me@as-cii.com>
Co-authored-by: Antonio <antonio@zed.dev>
Co-authored-by: Thorsten <thorsten@zed.dev>
Co-authored-by: Thorsten Ball <mrnugget@gmail.com>

Change summary

Cargo.lock                     |   1 
Cargo.toml                     |   1 
crates/editor/Cargo.toml       |   2 
crates/project/Cargo.toml      |   2 
crates/repl/Cargo.toml         |   1 
crates/repl/src/repl_editor.rs | 354 ++++++++++++++++++++++++++++-------
crates/repl/src/session.rs     |   9 
7 files changed, 292 insertions(+), 78 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8665,6 +8665,7 @@ dependencies = [
  "gpui",
  "http_client",
  "image",
+ "indoc",
  "language",
  "log",
  "multi_buffer",

Cargo.toml 🔗

@@ -161,6 +161,7 @@ resolver = "2"
 
 [workspace.dependencies]
 activity_indicator = { path = "crates/activity_indicator" }
+aho-corasick = "1.1"
 ai = { path = "crates/ai" }
 anthropic = { path = "crates/anthropic" }
 assets = { path = "crates/assets" }

crates/editor/Cargo.toml 🔗

@@ -28,7 +28,7 @@ test-support = [
 ]
 
 [dependencies]
-aho-corasick = "1.1"
+aho-corasick.workspace = true
 anyhow.workspace = true
 assets.workspace = true
 chrono.workspace = true

crates/project/Cargo.toml 🔗

@@ -24,7 +24,7 @@ test-support = [
 ]
 
 [dependencies]
-aho-corasick = "1.1"
+aho-corasick.workspace = true
 anyhow.workspace = true
 async-trait.workspace = true
 client.workspace = true

crates/repl/Cargo.toml 🔗

@@ -45,6 +45,7 @@ editor = { workspace = true, features = ["test-support"] }
 env_logger.workspace = true
 gpui = { workspace = true, features = ["test-support"] }
 http_client = { workspace = true, features = ["test-support"] }
+indoc.workspace = true
 language = { workspace = true, features = ["test-support"] }
 project = { workspace = true, features = ["test-support"] }
 settings = { workspace = true, features = ["test-support"] }

crates/repl/src/repl_editor.rs 🔗

@@ -4,10 +4,9 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use anyhow::{Context, Result};
-use editor::{Anchor, Editor, RangeToAnchorExt};
-use gpui::{prelude::*, AppContext, View, WeakView, WindowContext};
-use language::{Language, Point};
-use multi_buffer::MultiBufferRow;
+use editor::Editor;
+use gpui::{prelude::*, AppContext, Entity, View, WeakView, WindowContext};
+use language::{BufferSnapshot, Language, Point};
 
 use crate::repl_store::ReplStore;
 use crate::session::SessionEvent;
@@ -19,50 +18,69 @@ pub fn run(editor: WeakView<Editor>, cx: &mut WindowContext) -> Result<()> {
         return Ok(());
     }
 
-    let Some((selected_text, language, anchor_range)) = snippet(editor.clone(), cx) else {
+    let editor = editor.upgrade().context("editor was dropped")?;
+    let selected_range = editor
+        .update(cx, |editor, cx| editor.selections.newest_adjusted(cx))
+        .range();
+    let multibuffer = editor.read(cx).buffer().clone();
+    let Some(buffer) = multibuffer.read(cx).as_singleton() else {
         return Ok(());
     };
 
-    let entity_id = editor.entity_id();
+    for range in snippet_ranges(&buffer.read(cx).snapshot(), selected_range) {
+        let Some(language) = multibuffer.read(cx).language_at(range.start, cx) else {
+            continue;
+        };
 
-    let kernel_specification = store.update(cx, |store, cx| {
-        store
-            .kernelspec(&language, cx)
-            .with_context(|| format!("No kernel found for language: {}", language.name()))
-    })?;
-
-    let fs = store.read(cx).fs().clone();
-    let session = if let Some(session) = store.read(cx).get_session(entity_id).cloned() {
-        session
-    } else {
-        let session = cx.new_view(|cx| Session::new(editor.clone(), fs, kernel_specification, cx));
-
-        editor.update(cx, |_editor, cx| {
-            cx.notify();
-
-            cx.subscribe(&session, {
-                let store = store.clone();
-                move |_this, _session, event, cx| match event {
-                    SessionEvent::Shutdown(shutdown_event) => {
-                        store.update(cx, |store, _cx| {
-                            store.remove_session(shutdown_event.entity_id());
-                        });
-                    }
-                }
-            })
-            .detach();
+        let kernel_specification = store.update(cx, |store, cx| {
+            store
+                .kernelspec(&language, cx)
+                .with_context(|| format!("No kernel found for language: {}", language.name()))
         })?;
 
-        store.update(cx, |store, _cx| {
-            store.insert_session(entity_id, session.clone());
-        });
-
-        session
-    };
+        let fs = store.read(cx).fs().clone();
+        let session = if let Some(session) = store.read(cx).get_session(editor.entity_id()).cloned()
+        {
+            session
+        } else {
+            let weak_editor = editor.downgrade();
+            let session = cx.new_view(|cx| Session::new(weak_editor, fs, kernel_specification, cx));
+
+            editor.update(cx, |_editor, cx| {
+                cx.notify();
+
+                cx.subscribe(&session, {
+                    let store = store.clone();
+                    move |_this, _session, event, cx| match event {
+                        SessionEvent::Shutdown(shutdown_event) => {
+                            store.update(cx, |store, _cx| {
+                                store.remove_session(shutdown_event.entity_id());
+                            });
+                        }
+                    }
+                })
+                .detach();
+            });
+
+            store.update(cx, |store, _cx| {
+                store.insert_session(editor.entity_id(), session.clone());
+            });
+
+            session
+        };
+
+        let selected_text;
+        let anchor_range;
+        {
+            let snapshot = multibuffer.read(cx).read(cx);
+            selected_text = snapshot.text_for_range(range.clone()).collect::<String>();
+            anchor_range = snapshot.anchor_before(range.start)..snapshot.anchor_after(range.end);
+        }
 
-    session.update(cx, |session, cx| {
-        session.execute(&selected_text, anchor_range, cx);
-    });
+        session.update(cx, |session, cx| {
+            session.execute(selected_text, anchor_range, cx);
+        });
+    }
 
     anyhow::Ok(())
 }
@@ -134,48 +152,91 @@ pub fn shutdown(editor: WeakView<Editor>, cx: &mut WindowContext) {
     });
 }
 
-fn snippet(
-    editor: WeakView<Editor>,
-    cx: &mut WindowContext,
-) -> Option<(String, Arc<Language>, Range<Anchor>)> {
-    let selection = editor
-        .update(cx, |editor, cx| editor.selections.newest_adjusted(cx))
-        .ok()?;
+fn snippet_range(buffer: &BufferSnapshot, start_row: u32, end_row: u32) -> Range<Point> {
+    let mut snippet_end_row = end_row;
+    while buffer.is_line_blank(snippet_end_row) && snippet_end_row > start_row {
+        snippet_end_row -= 1;
+    }
+    Point::new(start_row, 0)..Point::new(snippet_end_row, buffer.line_len(snippet_end_row))
+}
 
-    let editor = editor.upgrade()?;
-    let editor = editor.read(cx);
+fn jupytext_snippets(buffer: &BufferSnapshot, range: Range<Point>) -> Vec<Range<Point>> {
+    let mut current_row = range.start.row;
 
-    let buffer = editor.buffer().read(cx).snapshot(cx);
-    let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx);
+    let Some(language) = buffer.language() else {
+        return Vec::new();
+    };
 
-    let range = if selection.is_empty() {
-        Point::new(selection.start.row, 0)
-            ..Point::new(
-                selection.start.row,
-                multi_buffer_snapshot.line_len(MultiBufferRow(selection.start.row)),
-            )
-    } else {
-        let mut range = selection.range();
-        if range.end.column == 0 {
-            range.end.row -= 1;
-            range.end.column = multi_buffer_snapshot.line_len(MultiBufferRow(range.end.row));
+    let default_scope = language.default_scope();
+    let comment_prefixes = default_scope.line_comment_prefixes();
+    if comment_prefixes.is_empty() {
+        return Vec::new();
+    }
+
+    let jupytext_prefixes = comment_prefixes
+        .iter()
+        .map(|comment_prefix| format!("{comment_prefix}%%"))
+        .collect::<Vec<_>>();
+
+    let mut snippet_start_row = None;
+    loop {
+        if jupytext_prefixes
+            .iter()
+            .any(|prefix| buffer.contains_str_at(Point::new(current_row, 0), prefix))
+        {
+            snippet_start_row = Some(current_row);
+            break;
+        } else if current_row > 0 {
+            current_row -= 1;
+        } else {
+            break;
         }
-        range
-    };
+    }
 
-    let anchor_range = range.to_anchors(&multi_buffer_snapshot);
+    let mut snippets = Vec::new();
+    if let Some(mut snippet_start_row) = snippet_start_row {
+        for current_row in range.start.row + 1..=buffer.max_point().row {
+            if jupytext_prefixes
+                .iter()
+                .any(|prefix| buffer.contains_str_at(Point::new(current_row, 0), prefix))
+            {
+                snippets.push(snippet_range(buffer, snippet_start_row, current_row - 1));
+
+                if current_row <= range.end.row {
+                    snippet_start_row = current_row;
+                } else {
+                    return snippets;
+                }
+            }
+        }
 
-    let selected_text = buffer
-        .text_for_range(anchor_range.clone())
-        .collect::<String>();
+        snippets.push(snippet_range(
+            buffer,
+            snippet_start_row,
+            buffer.max_point().row,
+        ));
+    }
 
-    let start_language = buffer.language_at(anchor_range.start)?;
-    let end_language = buffer.language_at(anchor_range.end)?;
-    if start_language != end_language {
-        return None;
+    snippets
+}
+
+fn snippet_ranges(buffer: &BufferSnapshot, range: Range<Point>) -> Vec<Range<Point>> {
+    let jupytext_snippets = jupytext_snippets(buffer, range.clone());
+    if !jupytext_snippets.is_empty() {
+        return jupytext_snippets;
+    }
+
+    let snippet_range = snippet_range(buffer, range.start.row, range.end.row);
+    let start_language = buffer.language_at(snippet_range.start);
+    let end_language = buffer.language_at(snippet_range.end);
+
+    if let Some((start, end)) = start_language.zip(end_language) {
+        if start == end {
+            return vec![snippet_range];
+        }
     }
 
-    Some((selected_text, start_language.clone(), anchor_range))
+    Vec::new()
 }
 
 fn get_language(editor: WeakView<Editor>, cx: &mut AppContext) -> Option<Arc<Language>> {
@@ -184,3 +245,148 @@ fn get_language(editor: WeakView<Editor>, cx: &mut AppContext) -> Option<Arc<Lan
     let buffer = editor.read(cx).buffer().read(cx).snapshot(cx);
     buffer.language_at(selection.head()).cloned()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use gpui::Context;
+    use indoc::indoc;
+    use language::{Buffer, Language, LanguageConfig};
+
+    #[gpui::test]
+    fn test_snippet_ranges(cx: &mut AppContext) {
+        // Create a test language
+        let test_language = Arc::new(Language::new(
+            LanguageConfig {
+                name: "TestLang".into(),
+                line_comments: vec!["# ".into()],
+                ..Default::default()
+            },
+            None,
+        ));
+
+        let buffer = cx.new_model(|cx| {
+            Buffer::local(
+                indoc! { r#"
+                    print(1 + 1)
+                    print(2 + 2)
+
+                    print(4 + 4)
+
+
+                "# },
+                cx,
+            )
+            .with_language(test_language, cx)
+        });
+        let snapshot = buffer.read(cx).snapshot();
+
+        // Single-point selection
+        let snippets = snippet_ranges(&snapshot, Point::new(0, 4)..Point::new(0, 4))
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+        assert_eq!(snippets, vec!["print(1 + 1)"]);
+
+        // Multi-line selection
+        let snippets = snippet_ranges(&snapshot, Point::new(0, 5)..Point::new(2, 0))
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+        assert_eq!(
+            snippets,
+            vec![indoc! { r#"
+                print(1 + 1)
+                print(2 + 2)"# }]
+        );
+
+        // Trimming multiple trailing blank lines
+        let snippets = snippet_ranges(&snapshot, Point::new(0, 5)..Point::new(5, 0))
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+        assert_eq!(
+            snippets,
+            vec![indoc! { r#"
+                print(1 + 1)
+                print(2 + 2)
+
+                print(4 + 4)"# }]
+        );
+    }
+
+    #[gpui::test]
+    fn test_jupytext_snippet_ranges(cx: &mut AppContext) {
+        // Create a test language
+        let test_language = Arc::new(Language::new(
+            LanguageConfig {
+                name: "TestLang".into(),
+                line_comments: vec!["# ".into()],
+                ..Default::default()
+            },
+            None,
+        ));
+
+        let buffer = cx.new_model(|cx| {
+            Buffer::local(
+                indoc! { r#"
+                    # Hello!
+                    # %% [markdown]
+                    # This is some arithmetic
+                    print(1 + 1)
+                    print(2 + 2)
+
+                    # %%
+                    print(3 + 3)
+                    print(4 + 4)
+
+                    print(5 + 5)
+
+
+
+                "# },
+                cx,
+            )
+            .with_language(test_language, cx)
+        });
+        let snapshot = buffer.read(cx).snapshot();
+
+        // Jupytext snippet surrounding an empty selection
+        let snippets = snippet_ranges(&snapshot, Point::new(2, 5)..Point::new(2, 5))
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+        assert_eq!(
+            snippets,
+            vec![indoc! { r#"
+                # %% [markdown]
+                # This is some arithmetic
+                print(1 + 1)
+                print(2 + 2)"# }]
+        );
+
+        // Jupytext snippets intersecting a non-empty selection
+        let snippets = snippet_ranges(&snapshot, Point::new(2, 5)..Point::new(6, 2))
+            .into_iter()
+            .map(|range| snapshot.text_for_range(range).collect::<String>())
+            .collect::<Vec<_>>();
+        assert_eq!(
+            snippets,
+            vec![
+                indoc! { r#"
+                    # %% [markdown]
+                    # This is some arithmetic
+                    print(1 + 1)
+                    print(2 + 2)"#
+                },
+                indoc! { r#"
+                    # %%
+                    print(3 + 3)
+                    print(4 + 4)
+
+                    print(5 + 5)"#
+                }
+            ]
+        );
+    }
+}

crates/repl/src/session.rs 🔗

@@ -377,7 +377,12 @@ impl Session {
         self.blocks.clear();
     }
 
-    pub fn execute(&mut self, code: &str, anchor_range: Range<Anchor>, cx: &mut ViewContext<Self>) {
+    pub fn execute(
+        &mut self,
+        code: String,
+        anchor_range: Range<Anchor>,
+        cx: &mut ViewContext<Self>,
+    ) {
         let Some(editor) = self.editor.upgrade() else {
             return;
         };
@@ -387,7 +392,7 @@ impl Session {
         }
 
         let execute_request = ExecuteRequest {
-            code: code.to_string(),
+            code,
             ..ExecuteRequest::default()
         };