Paginate regex and path search tools (#26997)

Richard Feldman created

<img width="630" alt="Screenshot 2025-03-18 at 10 50 17 AM"
src="https://github.com/user-attachments/assets/0aee5367-402a-405a-8676-f2f8af425b1e"
/>

Release Notes:

- N/A

Change summary

crates/assistant_tools/src/path_search_tool.rs              | 33 ++
crates/assistant_tools/src/path_search_tool/description.md  |  4 
crates/assistant_tools/src/read_file_tool.rs                | 16 
crates/assistant_tools/src/regex_search.rs                  | 72 +++++-
crates/assistant_tools/src/regex_search_tool/description.md |  2 
5 files changed, 101 insertions(+), 26 deletions(-)

Detailed changes

crates/assistant_tools/src/path_search_tool.rs 🔗

@@ -23,8 +23,15 @@ pub struct PathSearchToolInput {
     /// You can get back the first two paths by providing a glob of "*thing*.txt"
     /// </example>
     pub glob: String,
+
+    /// Optional starting position for paginated results (0-based).
+    /// When not provided, starts from the beginning.
+    #[serde(default)]
+    pub offset: Option<usize>,
 }
 
+const RESULTS_PER_PAGE: usize = 50;
+
 pub struct PathSearchTool;
 
 impl Tool for PathSearchTool {
@@ -49,8 +56,8 @@ impl Tool for PathSearchTool {
         _action_log: Entity<ActionLog>,
         cx: &mut App,
     ) -> Task<Result<String>> {
-        let glob = match serde_json::from_value::<PathSearchToolInput>(input) {
-            Ok(input) => input.glob,
+        let (offset, glob) = match serde_json::from_value::<PathSearchToolInput>(input) {
+            Ok(input) => (input.offset.unwrap_or(0), input.glob),
             Err(err) => return Task::ready(Err(anyhow!(err))),
         };
         let path_matcher = match PathMatcher::new(&[glob.clone()]) {
@@ -87,7 +94,27 @@ impl Tool for PathSearchTool {
             } else {
                 // Sort to group entries in the same directory together.
                 matches.sort();
-                Ok(matches.join("\n"))
+
+                let total_matches = matches.len();
+                let response = if total_matches > offset + RESULTS_PER_PAGE {
+                  let paginated_matches: Vec<_> = matches
+                      .into_iter()
+                      .skip(offset)
+                      .take(RESULTS_PER_PAGE)
+                      .collect();
+
+                    format!(
+                        "Found {} total matches. Showing results {}-{} (provide 'offset' parameter for more results):\n\n{}",
+                        total_matches,
+                        offset + 1,
+                        offset + paginated_matches.len(),
+                        paginated_matches.join("\n")
+                    )
+                } else {
+                    matches.join("\n")
+                };
+
+                Ok(response)
             }
         })
     }

crates/assistant_tools/src/path_search_tool/description.md 🔗

@@ -1 +1,3 @@
-Returns all the paths in the project which match the given glob.
+Returns paths in the project which match the given glob.
+
+Results are paginated with 50 matches per page. Use the optional 'offset' parameter to request subsequent pages.

crates/assistant_tools/src/read_file_tool.rs 🔗

@@ -28,13 +28,13 @@ pub struct ReadFileToolInput {
     /// </example>
     pub path: Arc<Path>,
 
-    /// Optional line number to start reading from (0-based index)
+    /// Optional line number to start reading on (1-based index)
     #[serde(default)]
     pub start_line: Option<usize>,
 
-    /// Optional number of lines to read
+    /// Optional line number to end reading on (1-based index)
     #[serde(default)]
-    pub line_count: Option<usize>,
+    pub end_line: Option<usize>,
 }
 
 pub struct ReadFileTool;
@@ -83,10 +83,12 @@ impl Tool for ReadFileTool {
                     .map_or(false, |file| file.disk_state().exists())
                 {
                     let text = buffer.text();
-                    let string = if input.start_line.is_some() || input.line_count.is_some() {
-                        let lines = text.split('\n').skip(input.start_line.unwrap_or(0));
-                        if let Some(line_count) = input.line_count {
-                            Itertools::intersperse(lines.take(line_count), "\n").collect()
+                    let string = if input.start_line.is_some() || input.end_line.is_some() {
+                        let start = input.start_line.unwrap_or(1);
+                        let lines = text.split('\n').skip(start - 1);
+                        if let Some(end) = input.end_line {
+                            let count = end.saturating_sub(start);
+                            Itertools::intersperse(lines.take(count), "\n").collect()
                         } else {
                             Itertools::intersperse(lines, "\n").collect()
                         }

crates/assistant_tools/src/regex_search.rs 🔗

@@ -4,7 +4,10 @@ use futures::StreamExt;
 use gpui::{App, Entity, Task};
 use language::OffsetRangeExt;
 use language_model::LanguageModelRequestMessage;
-use project::{search::SearchQuery, Project};
+use project::{
+    search::{SearchQuery, SearchResult},
+    Project,
+};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::{cmp, fmt::Write, sync::Arc};
@@ -15,8 +18,15 @@ pub struct RegexSearchToolInput {
     /// A regex pattern to search for in the entire project. Note that the regex
     /// will be parsed by the Rust `regex` crate.
     pub regex: String,
+
+    /// Optional starting position for paginated results (0-based).
+    /// When not provided, starts from the beginning.
+    #[serde(default)]
+    pub offset: Option<usize>,
 }
 
+const RESULTS_PER_PAGE: usize = 20;
+
 pub struct RegexSearchTool;
 
 impl Tool for RegexSearchTool {
@@ -43,13 +53,13 @@ impl Tool for RegexSearchTool {
     ) -> Task<Result<String>> {
         const CONTEXT_LINES: u32 = 2;
 
-        let input = match serde_json::from_value::<RegexSearchToolInput>(input) {
-            Ok(input) => input,
+        let (offset, regex) = match serde_json::from_value::<RegexSearchToolInput>(input) {
+            Ok(input) => (input.offset.unwrap_or(0), input.regex),
             Err(err) => return Task::ready(Err(anyhow!(err))),
         };
 
         let query = match SearchQuery::regex(
-            &input.regex,
+            &regex,
             false,
             false,
             false,
@@ -62,20 +72,23 @@ impl Tool for RegexSearchTool {
         };
 
         let results = project.update(cx, |project, cx| project.search(query, cx));
+
         cx.spawn(|cx| async move {
             futures::pin_mut!(results);
 
             let mut output = String::new();
-            while let Some(project::search::SearchResult::Buffer { buffer, ranges }) =
-                results.next().await
-            {
+            let mut skips_remaining = offset;
+            let mut matches_found = 0;
+            let mut has_more_matches = false;
+
+            while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await {
                 if ranges.is_empty() {
                     continue;
                 }
 
-                buffer.read_with(&cx, |buffer, cx| {
+                buffer.read_with(&cx, |buffer, cx| -> Result<(), anyhow::Error> {
                     if let Some(path) = buffer.file().map(|file| file.full_path(cx)) {
-                        writeln!(output, "### Found matches in {}:\n", path.display()).unwrap();
+                        let mut file_header_written = false;
                         let mut ranges = ranges
                             .into_iter()
                             .map(|range| {
@@ -93,6 +106,17 @@ impl Tool for RegexSearchTool {
                             .peekable();
 
                         while let Some(mut range) = ranges.next() {
+                            if skips_remaining > 0 {
+                                skips_remaining -= 1;
+                                continue;
+                            }
+
+                            // We'd already found a full page of matches, and we just found one more.
+                            if matches_found >= RESULTS_PER_PAGE {
+                                has_more_matches = true;
+                                return Ok(());
+                            }
+
                             while let Some(next_range) = ranges.peek() {
                                 if range.end.row >= next_range.start.row {
                                     range.end = next_range.end;
@@ -102,18 +126,36 @@ impl Tool for RegexSearchTool {
                                 }
                             }
 
-                            writeln!(output, "```").unwrap();
+                            if !file_header_written {
+                                writeln!(output, "\n## Matches in {}", path.display())?;
+                                file_header_written = true;
+                            }
+
+                            let start_line = range.start.row + 1;
+                            let end_line = range.end.row + 1;
+                            writeln!(output, "\n### Lines {start_line}-{end_line}\n```")?;
                             output.extend(buffer.text_for_range(range));
-                            writeln!(output, "\n```\n").unwrap();
+                            output.push_str("\n```\n");
+
+                            matches_found += 1;
                         }
                     }
-                })?;
+
+                    Ok(())
+                })??;
             }
 
-            if output.is_empty() {
+            if matches_found == 0 {
                 Ok("No matches found".to_string())
-            } else {
-                Ok(output)
+            } else if has_more_matches {
+                Ok(format!(
+                    "Showing matches {}-{} (there were more matches found; use offset: {} to see next page):\n{output}",
+                    offset + 1,
+                    offset + matches_found,
+                    offset + RESULTS_PER_PAGE,
+                ))
+          } else {
+                Ok(format!("Found {matches_found} matches:\n{output}"))
             }
         })
     }

crates/assistant_tools/src/regex_search_tool/description.md 🔗

@@ -1,3 +1,5 @@
 Searches the entire project for the given regular expression.
 
 Returns a list of paths that matched the query. For each path, it returns a list of excerpts of the matched text.
+
+Results are paginated with 20 matches per page. Use the optional 'offset' parameter to request subsequent pages.