1use crate::schema::json_schema_for;
2use action_log::ActionLog;
3use anyhow::{Result, anyhow};
4use assistant_tool::{Tool, ToolResult};
5use futures::StreamExt;
6use gpui::{AnyWindowHandle, App, Entity, Task};
7use language::{OffsetRangeExt, ParseStatus, Point};
8use language_model::{LanguageModel, LanguageModelRequest, LanguageModelToolSchemaFormat};
9use project::{
10 Project, WorktreeSettings,
11 search::{SearchQuery, SearchResult},
12};
13use schemars::JsonSchema;
14use serde::{Deserialize, Serialize};
15use settings::Settings;
16use std::{cmp, fmt::Write, sync::Arc};
17use ui::IconName;
18use util::RangeExt;
19use util::markdown::MarkdownInlineCode;
20use util::paths::PathMatcher;
21
22#[derive(Debug, Serialize, Deserialize, JsonSchema)]
23pub struct GrepToolInput {
24 /// A regex pattern to search for in the entire project. Note that the regex
25 /// will be parsed by the Rust `regex` crate.
26 ///
27 /// Do NOT specify a path here! This will only be matched against the code **content**.
28 pub regex: String,
29
30 /// A glob pattern for the paths of files to include in the search.
31 /// Supports standard glob patterns like "**/*.rs" or "src/**/*.ts".
32 /// If omitted, all files in the project will be searched.
33 pub include_pattern: Option<String>,
34
35 /// Optional starting position for paginated results (0-based).
36 /// When not provided, starts from the beginning.
37 #[serde(default)]
38 pub offset: u32,
39
40 /// Whether the regex is case-sensitive. Defaults to false (case-insensitive).
41 #[serde(default)]
42 pub case_sensitive: bool,
43}
44
45impl GrepToolInput {
46 /// Which page of search results this is.
47 pub fn page(&self) -> u32 {
48 1 + (self.offset / RESULTS_PER_PAGE)
49 }
50}
51
52const RESULTS_PER_PAGE: u32 = 20;
53
54pub struct GrepTool;
55
56impl Tool for GrepTool {
57 fn name(&self) -> String {
58 "grep".into()
59 }
60
61 fn needs_confirmation(&self, _: &serde_json::Value, _: &Entity<Project>, _: &App) -> bool {
62 false
63 }
64
65 fn may_perform_edits(&self) -> bool {
66 false
67 }
68
69 fn description(&self) -> String {
70 include_str!("./grep_tool/description.md").into()
71 }
72
73 fn icon(&self) -> IconName {
74 IconName::ToolRegex
75 }
76
77 fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
78 json_schema_for::<GrepToolInput>(format)
79 }
80
81 fn ui_text(&self, input: &serde_json::Value) -> String {
82 match serde_json::from_value::<GrepToolInput>(input.clone()) {
83 Ok(input) => {
84 let page = input.page();
85 let regex_str = MarkdownInlineCode(&input.regex);
86 let case_info = if input.case_sensitive {
87 " (case-sensitive)"
88 } else {
89 ""
90 };
91
92 if page > 1 {
93 format!("Get page {page} of search results for regex {regex_str}{case_info}")
94 } else {
95 format!("Search files for regex {regex_str}{case_info}")
96 }
97 }
98 Err(_) => "Search with regex".to_string(),
99 }
100 }
101
102 fn run(
103 self: Arc<Self>,
104 input: serde_json::Value,
105 _request: Arc<LanguageModelRequest>,
106 project: Entity<Project>,
107 _action_log: Entity<ActionLog>,
108 _model: Arc<dyn LanguageModel>,
109 _window: Option<AnyWindowHandle>,
110 cx: &mut App,
111 ) -> ToolResult {
112 const CONTEXT_LINES: u32 = 2;
113 const MAX_ANCESTOR_LINES: u32 = 10;
114
115 let input = match serde_json::from_value::<GrepToolInput>(input) {
116 Ok(input) => input,
117 Err(error) => {
118 return Task::ready(Err(anyhow!("Failed to parse input: {error}"))).into();
119 }
120 };
121
122 let include_matcher = match PathMatcher::new(
123 input
124 .include_pattern
125 .as_ref()
126 .into_iter()
127 .collect::<Vec<_>>(),
128 ) {
129 Ok(matcher) => matcher,
130 Err(error) => {
131 return Task::ready(Err(anyhow!("invalid include glob pattern: {error}"))).into();
132 }
133 };
134
135 // Exclude global file_scan_exclusions and private_files settings
136 let exclude_matcher = {
137 let global_settings = WorktreeSettings::get_global(cx);
138 let exclude_patterns = global_settings
139 .file_scan_exclusions
140 .sources()
141 .iter()
142 .chain(global_settings.private_files.sources().iter());
143
144 match PathMatcher::new(exclude_patterns) {
145 Ok(matcher) => matcher,
146 Err(error) => {
147 return Task::ready(Err(anyhow!("invalid exclude pattern: {error}"))).into();
148 }
149 }
150 };
151
152 let query = match SearchQuery::regex(
153 &input.regex,
154 false,
155 input.case_sensitive,
156 false,
157 false,
158 include_matcher,
159 exclude_matcher,
160 true, // Always match file include pattern against *full project paths* that start with a project root.
161 None,
162 ) {
163 Ok(query) => query,
164 Err(error) => return Task::ready(Err(error)).into(),
165 };
166
167 let results = project.update(cx, |project, cx| project.search(query, cx));
168
169 cx.spawn(async move |cx| {
170 futures::pin_mut!(results);
171
172 let mut output = String::new();
173 let mut skips_remaining = input.offset;
174 let mut matches_found = 0;
175 let mut has_more_matches = false;
176
177 'outer: while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await {
178 if ranges.is_empty() {
179 continue;
180 }
181
182 let Ok((Some(path), mut parse_status)) = buffer.read_with(cx, |buffer, cx| {
183 (buffer.file().map(|file| file.full_path(cx)), buffer.parse_status())
184 }) else {
185 continue;
186 };
187
188 // Check if this file should be excluded based on its worktree settings
189 if let Ok(Some(project_path)) = project.read_with(cx, |project, cx| {
190 project.find_project_path(&path, cx)
191 }) {
192 if cx.update(|cx| {
193 let worktree_settings = WorktreeSettings::get(Some((&project_path).into()), cx);
194 worktree_settings.is_path_excluded(&project_path.path)
195 || worktree_settings.is_path_private(&project_path.path)
196 }).unwrap_or(false) {
197 continue;
198 }
199 }
200
201 while *parse_status.borrow() != ParseStatus::Idle {
202 parse_status.changed().await?;
203 }
204
205 let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
206
207 let mut ranges = ranges
208 .into_iter()
209 .map(|range| {
210 let matched = range.to_point(&snapshot);
211 let matched_end_line_len = snapshot.line_len(matched.end.row);
212 let full_lines = Point::new(matched.start.row, 0)..Point::new(matched.end.row, matched_end_line_len);
213 let symbols = snapshot.symbols_containing(matched.start, None);
214
215 if let Some(ancestor_node) = snapshot.syntax_ancestor(full_lines.clone()) {
216 let full_ancestor_range = ancestor_node.byte_range().to_point(&snapshot);
217 let end_row = full_ancestor_range.end.row.min(full_ancestor_range.start.row + MAX_ANCESTOR_LINES);
218 let end_col = snapshot.line_len(end_row);
219 let capped_ancestor_range = Point::new(full_ancestor_range.start.row, 0)..Point::new(end_row, end_col);
220
221 if capped_ancestor_range.contains_inclusive(&full_lines) {
222 return (capped_ancestor_range, Some(full_ancestor_range), symbols)
223 }
224 }
225
226 let mut matched = matched;
227 matched.start.column = 0;
228 matched.start.row =
229 matched.start.row.saturating_sub(CONTEXT_LINES);
230 matched.end.row = cmp::min(
231 snapshot.max_point().row,
232 matched.end.row + CONTEXT_LINES,
233 );
234 matched.end.column = snapshot.line_len(matched.end.row);
235
236 (matched, None, symbols)
237 })
238 .peekable();
239
240 let mut file_header_written = false;
241
242 while let Some((mut range, ancestor_range, parent_symbols)) = ranges.next(){
243 if skips_remaining > 0 {
244 skips_remaining -= 1;
245 continue;
246 }
247
248 // We'd already found a full page of matches, and we just found one more.
249 if matches_found >= RESULTS_PER_PAGE {
250 has_more_matches = true;
251 break 'outer;
252 }
253
254 while let Some((next_range, _, _)) = ranges.peek() {
255 if range.end.row >= next_range.start.row {
256 range.end = next_range.end;
257 ranges.next();
258 } else {
259 break;
260 }
261 }
262
263 if !file_header_written {
264 writeln!(output, "\n## Matches in {}", path.display())?;
265 file_header_written = true;
266 }
267
268 let end_row = range.end.row;
269 output.push_str("\n### ");
270
271 if let Some(parent_symbols) = &parent_symbols {
272 for symbol in parent_symbols {
273 write!(output, "{} › ", symbol.text)?;
274 }
275 }
276
277 if range.start.row == end_row {
278 writeln!(output, "L{}", range.start.row + 1)?;
279 } else {
280 writeln!(output, "L{}-{}", range.start.row + 1, end_row + 1)?;
281 }
282
283 output.push_str("```\n");
284 output.extend(snapshot.text_for_range(range));
285 output.push_str("\n```\n");
286
287 if let Some(ancestor_range) = ancestor_range {
288 if end_row < ancestor_range.end.row {
289 let remaining_lines = ancestor_range.end.row - end_row;
290 writeln!(output, "\n{} lines remaining in ancestor node. Read the file to see all.", remaining_lines)?;
291 }
292 }
293
294 matches_found += 1;
295 }
296 }
297
298 if matches_found == 0 {
299 Ok("No matches found".to_string().into())
300 } else if has_more_matches {
301 Ok(format!(
302 "Showing matches {}-{} (there were more matches found; use offset: {} to see next page):\n{output}",
303 input.offset + 1,
304 input.offset + matches_found,
305 input.offset + RESULTS_PER_PAGE,
306 ).into())
307 } else {
308 Ok(format!("Found {matches_found} matches:\n{output}").into())
309 }
310 }).into()
311 }
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317 use assistant_tool::Tool;
318 use gpui::{AppContext, TestAppContext, UpdateGlobal};
319 use language::{Language, LanguageConfig, LanguageMatcher};
320 use language_model::fake_provider::FakeLanguageModel;
321 use project::{FakeFs, Project, WorktreeSettings};
322 use serde_json::json;
323 use settings::SettingsStore;
324 use unindent::Unindent;
325 use util::path;
326
327 #[gpui::test]
328 async fn test_grep_tool_with_include_pattern(cx: &mut TestAppContext) {
329 init_test(cx);
330 cx.executor().allow_parking();
331
332 let fs = FakeFs::new(cx.executor().clone());
333 fs.insert_tree(
334 path!("/root"),
335 serde_json::json!({
336 "src": {
337 "main.rs": "fn main() {\n println!(\"Hello, world!\");\n}",
338 "utils": {
339 "helper.rs": "fn helper() {\n println!(\"I'm a helper!\");\n}",
340 },
341 },
342 "tests": {
343 "test_main.rs": "fn test_main() {\n assert!(true);\n}",
344 }
345 }),
346 )
347 .await;
348
349 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
350
351 // Test with include pattern for Rust files inside the root of the project
352 let input = serde_json::to_value(GrepToolInput {
353 regex: "println".to_string(),
354 include_pattern: Some("root/**/*.rs".to_string()),
355 offset: 0,
356 case_sensitive: false,
357 })
358 .unwrap();
359
360 let result = run_grep_tool(input, project.clone(), cx).await;
361 assert!(result.contains("main.rs"), "Should find matches in main.rs");
362 assert!(
363 result.contains("helper.rs"),
364 "Should find matches in helper.rs"
365 );
366 assert!(
367 !result.contains("test_main.rs"),
368 "Should not include test_main.rs even though it's a .rs file (because it doesn't have the pattern)"
369 );
370
371 // Test with include pattern for src directory only
372 let input = serde_json::to_value(GrepToolInput {
373 regex: "fn".to_string(),
374 include_pattern: Some("root/**/src/**".to_string()),
375 offset: 0,
376 case_sensitive: false,
377 })
378 .unwrap();
379
380 let result = run_grep_tool(input, project.clone(), cx).await;
381 assert!(
382 result.contains("main.rs"),
383 "Should find matches in src/main.rs"
384 );
385 assert!(
386 result.contains("helper.rs"),
387 "Should find matches in src/utils/helper.rs"
388 );
389 assert!(
390 !result.contains("test_main.rs"),
391 "Should not include test_main.rs as it's not in src directory"
392 );
393
394 // Test with empty include pattern (should default to all files)
395 let input = serde_json::to_value(GrepToolInput {
396 regex: "fn".to_string(),
397 include_pattern: None,
398 offset: 0,
399 case_sensitive: false,
400 })
401 .unwrap();
402
403 let result = run_grep_tool(input, project.clone(), cx).await;
404 assert!(result.contains("main.rs"), "Should find matches in main.rs");
405 assert!(
406 result.contains("helper.rs"),
407 "Should find matches in helper.rs"
408 );
409 assert!(
410 result.contains("test_main.rs"),
411 "Should include test_main.rs"
412 );
413 }
414
415 #[gpui::test]
416 async fn test_grep_tool_with_case_sensitivity(cx: &mut TestAppContext) {
417 init_test(cx);
418 cx.executor().allow_parking();
419
420 let fs = FakeFs::new(cx.executor().clone());
421 fs.insert_tree(
422 path!("/root"),
423 serde_json::json!({
424 "case_test.txt": "This file has UPPERCASE and lowercase text.\nUPPERCASE patterns should match only with case_sensitive: true",
425 }),
426 )
427 .await;
428
429 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
430
431 // Test case-insensitive search (default)
432 let input = serde_json::to_value(GrepToolInput {
433 regex: "uppercase".to_string(),
434 include_pattern: Some("**/*.txt".to_string()),
435 offset: 0,
436 case_sensitive: false,
437 })
438 .unwrap();
439
440 let result = run_grep_tool(input, project.clone(), cx).await;
441 assert!(
442 result.contains("UPPERCASE"),
443 "Case-insensitive search should match uppercase"
444 );
445
446 // Test case-sensitive search
447 let input = serde_json::to_value(GrepToolInput {
448 regex: "uppercase".to_string(),
449 include_pattern: Some("**/*.txt".to_string()),
450 offset: 0,
451 case_sensitive: true,
452 })
453 .unwrap();
454
455 let result = run_grep_tool(input, project.clone(), cx).await;
456 assert!(
457 !result.contains("UPPERCASE"),
458 "Case-sensitive search should not match uppercase"
459 );
460
461 // Test case-sensitive search
462 let input = serde_json::to_value(GrepToolInput {
463 regex: "LOWERCASE".to_string(),
464 include_pattern: Some("**/*.txt".to_string()),
465 offset: 0,
466 case_sensitive: true,
467 })
468 .unwrap();
469
470 let result = run_grep_tool(input, project.clone(), cx).await;
471
472 assert!(
473 !result.contains("lowercase"),
474 "Case-sensitive search should match lowercase"
475 );
476
477 // Test case-sensitive search for lowercase pattern
478 let input = serde_json::to_value(GrepToolInput {
479 regex: "lowercase".to_string(),
480 include_pattern: Some("**/*.txt".to_string()),
481 offset: 0,
482 case_sensitive: true,
483 })
484 .unwrap();
485
486 let result = run_grep_tool(input, project.clone(), cx).await;
487 assert!(
488 result.contains("lowercase"),
489 "Case-sensitive search should match lowercase text"
490 );
491 }
492
493 /// Helper function to set up a syntax test environment
494 async fn setup_syntax_test(cx: &mut TestAppContext) -> Entity<Project> {
495 use unindent::Unindent;
496 init_test(cx);
497 cx.executor().allow_parking();
498
499 let fs = FakeFs::new(cx.executor().clone());
500
501 // Create test file with syntax structures
502 fs.insert_tree(
503 path!("/root"),
504 serde_json::json!({
505 "test_syntax.rs": r#"
506 fn top_level_function() {
507 println!("This is at the top level");
508 }
509
510 mod feature_module {
511 pub mod nested_module {
512 pub fn nested_function(
513 first_arg: String,
514 second_arg: i32,
515 ) {
516 println!("Function in nested module");
517 println!("{first_arg}");
518 println!("{second_arg}");
519 }
520 }
521 }
522
523 struct MyStruct {
524 field1: String,
525 field2: i32,
526 }
527
528 impl MyStruct {
529 fn method_with_block() {
530 let condition = true;
531 if condition {
532 println!("Inside if block");
533 }
534 }
535
536 fn long_function() {
537 println!("Line 1");
538 println!("Line 2");
539 println!("Line 3");
540 println!("Line 4");
541 println!("Line 5");
542 println!("Line 6");
543 println!("Line 7");
544 println!("Line 8");
545 println!("Line 9");
546 println!("Line 10");
547 println!("Line 11");
548 println!("Line 12");
549 }
550 }
551
552 trait Processor {
553 fn process(&self, input: &str) -> String;
554 }
555
556 impl Processor for MyStruct {
557 fn process(&self, input: &str) -> String {
558 format!("Processed: {}", input)
559 }
560 }
561 "#.unindent().trim(),
562 }),
563 )
564 .await;
565
566 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
567
568 project.update(cx, |project, _cx| {
569 project.languages().add(rust_lang().into())
570 });
571
572 project
573 }
574
575 #[gpui::test]
576 async fn test_grep_top_level_function(cx: &mut TestAppContext) {
577 let project = setup_syntax_test(cx).await;
578
579 // Test: Line at the top level of the file
580 let input = serde_json::to_value(GrepToolInput {
581 regex: "This is at the top level".to_string(),
582 include_pattern: Some("**/*.rs".to_string()),
583 offset: 0,
584 case_sensitive: false,
585 })
586 .unwrap();
587
588 let result = run_grep_tool(input, project.clone(), cx).await;
589 let expected = r#"
590 Found 1 matches:
591
592 ## Matches in root/test_syntax.rs
593
594 ### fn top_level_function › L1-3
595 ```
596 fn top_level_function() {
597 println!("This is at the top level");
598 }
599 ```
600 "#
601 .unindent();
602 assert_eq!(result, expected);
603 }
604
605 #[gpui::test]
606 async fn test_grep_function_body(cx: &mut TestAppContext) {
607 let project = setup_syntax_test(cx).await;
608
609 // Test: Line inside a function body
610 let input = serde_json::to_value(GrepToolInput {
611 regex: "Function in nested module".to_string(),
612 include_pattern: Some("**/*.rs".to_string()),
613 offset: 0,
614 case_sensitive: false,
615 })
616 .unwrap();
617
618 let result = run_grep_tool(input, project.clone(), cx).await;
619 let expected = r#"
620 Found 1 matches:
621
622 ## Matches in root/test_syntax.rs
623
624 ### mod feature_module › pub mod nested_module › pub fn nested_function › L10-14
625 ```
626 ) {
627 println!("Function in nested module");
628 println!("{first_arg}");
629 println!("{second_arg}");
630 }
631 ```
632 "#
633 .unindent();
634 assert_eq!(result, expected);
635 }
636
637 #[gpui::test]
638 async fn test_grep_function_args_and_body(cx: &mut TestAppContext) {
639 let project = setup_syntax_test(cx).await;
640
641 // Test: Line with a function argument
642 let input = serde_json::to_value(GrepToolInput {
643 regex: "second_arg".to_string(),
644 include_pattern: Some("**/*.rs".to_string()),
645 offset: 0,
646 case_sensitive: false,
647 })
648 .unwrap();
649
650 let result = run_grep_tool(input, project.clone(), cx).await;
651 let expected = r#"
652 Found 1 matches:
653
654 ## Matches in root/test_syntax.rs
655
656 ### mod feature_module › pub mod nested_module › pub fn nested_function › L7-14
657 ```
658 pub fn nested_function(
659 first_arg: String,
660 second_arg: i32,
661 ) {
662 println!("Function in nested module");
663 println!("{first_arg}");
664 println!("{second_arg}");
665 }
666 ```
667 "#
668 .unindent();
669 assert_eq!(result, expected);
670 }
671
672 #[gpui::test]
673 async fn test_grep_if_block(cx: &mut TestAppContext) {
674 use unindent::Unindent;
675 let project = setup_syntax_test(cx).await;
676
677 // Test: Line inside an if block
678 let input = serde_json::to_value(GrepToolInput {
679 regex: "Inside if block".to_string(),
680 include_pattern: Some("**/*.rs".to_string()),
681 offset: 0,
682 case_sensitive: false,
683 })
684 .unwrap();
685
686 let result = run_grep_tool(input, project.clone(), cx).await;
687 let expected = r#"
688 Found 1 matches:
689
690 ## Matches in root/test_syntax.rs
691
692 ### impl MyStruct › fn method_with_block › L26-28
693 ```
694 if condition {
695 println!("Inside if block");
696 }
697 ```
698 "#
699 .unindent();
700 assert_eq!(result, expected);
701 }
702
703 #[gpui::test]
704 async fn test_grep_long_function_top(cx: &mut TestAppContext) {
705 use unindent::Unindent;
706 let project = setup_syntax_test(cx).await;
707
708 // Test: Line in the middle of a long function - should show message about remaining lines
709 let input = serde_json::to_value(GrepToolInput {
710 regex: "Line 5".to_string(),
711 include_pattern: Some("**/*.rs".to_string()),
712 offset: 0,
713 case_sensitive: false,
714 })
715 .unwrap();
716
717 let result = run_grep_tool(input, project.clone(), cx).await;
718 let expected = r#"
719 Found 1 matches:
720
721 ## Matches in root/test_syntax.rs
722
723 ### impl MyStruct › fn long_function › L31-41
724 ```
725 fn long_function() {
726 println!("Line 1");
727 println!("Line 2");
728 println!("Line 3");
729 println!("Line 4");
730 println!("Line 5");
731 println!("Line 6");
732 println!("Line 7");
733 println!("Line 8");
734 println!("Line 9");
735 println!("Line 10");
736 ```
737
738 3 lines remaining in ancestor node. Read the file to see all.
739 "#
740 .unindent();
741 assert_eq!(result, expected);
742 }
743
744 #[gpui::test]
745 async fn test_grep_long_function_bottom(cx: &mut TestAppContext) {
746 use unindent::Unindent;
747 let project = setup_syntax_test(cx).await;
748
749 // Test: Line in the long function
750 let input = serde_json::to_value(GrepToolInput {
751 regex: "Line 12".to_string(),
752 include_pattern: Some("**/*.rs".to_string()),
753 offset: 0,
754 case_sensitive: false,
755 })
756 .unwrap();
757
758 let result = run_grep_tool(input, project.clone(), cx).await;
759 let expected = r#"
760 Found 1 matches:
761
762 ## Matches in root/test_syntax.rs
763
764 ### impl MyStruct › fn long_function › L41-45
765 ```
766 println!("Line 10");
767 println!("Line 11");
768 println!("Line 12");
769 }
770 }
771 ```
772 "#
773 .unindent();
774 assert_eq!(result, expected);
775 }
776
777 async fn run_grep_tool(
778 input: serde_json::Value,
779 project: Entity<Project>,
780 cx: &mut TestAppContext,
781 ) -> String {
782 let tool = Arc::new(GrepTool);
783 let action_log = cx.new(|_cx| ActionLog::new(project.clone()));
784 let model = Arc::new(FakeLanguageModel::default());
785 let task =
786 cx.update(|cx| tool.run(input, Arc::default(), project, action_log, model, None, cx));
787
788 match task.output.await {
789 Ok(result) => {
790 if cfg!(windows) {
791 result.content.as_str().unwrap().replace("root\\", "root/")
792 } else {
793 result.content.as_str().unwrap().to_string()
794 }
795 }
796 Err(e) => panic!("Failed to run grep tool: {}", e),
797 }
798 }
799
800 fn init_test(cx: &mut TestAppContext) {
801 cx.update(|cx| {
802 let settings_store = SettingsStore::test(cx);
803 cx.set_global(settings_store);
804 language::init(cx);
805 Project::init_settings(cx);
806 });
807 }
808
809 fn rust_lang() -> Language {
810 Language::new(
811 LanguageConfig {
812 name: "Rust".into(),
813 matcher: LanguageMatcher {
814 path_suffixes: vec!["rs".to_string()],
815 ..Default::default()
816 },
817 ..Default::default()
818 },
819 Some(tree_sitter_rust::LANGUAGE.into()),
820 )
821 .with_outline_query(include_str!("../../languages/src/rust/outline.scm"))
822 .unwrap()
823 }
824
825 #[gpui::test]
826 async fn test_grep_security_boundaries(cx: &mut TestAppContext) {
827 init_test(cx);
828
829 let fs = FakeFs::new(cx.executor());
830
831 fs.insert_tree(
832 path!("/"),
833 json!({
834 "project_root": {
835 "allowed_file.rs": "fn main() { println!(\"This file is in the project\"); }",
836 ".mysecrets": "SECRET_KEY=abc123\nfn secret() { /* private */ }",
837 ".secretdir": {
838 "config": "fn special_configuration() { /* excluded */ }"
839 },
840 ".mymetadata": "fn custom_metadata() { /* excluded */ }",
841 "subdir": {
842 "normal_file.rs": "fn normal_file_content() { /* Normal */ }",
843 "special.privatekey": "fn private_key_content() { /* private */ }",
844 "data.mysensitive": "fn sensitive_data() { /* private */ }"
845 }
846 },
847 "outside_project": {
848 "sensitive_file.rs": "fn outside_function() { /* This file is outside the project */ }"
849 }
850 }),
851 )
852 .await;
853
854 cx.update(|cx| {
855 use gpui::UpdateGlobal;
856 use project::WorktreeSettings;
857 use settings::SettingsStore;
858 SettingsStore::update_global(cx, |store, cx| {
859 store.update_user_settings::<WorktreeSettings>(cx, |settings| {
860 settings.file_scan_exclusions = Some(vec![
861 "**/.secretdir".to_string(),
862 "**/.mymetadata".to_string(),
863 ]);
864 settings.private_files = Some(vec![
865 "**/.mysecrets".to_string(),
866 "**/*.privatekey".to_string(),
867 "**/*.mysensitive".to_string(),
868 ]);
869 });
870 });
871 });
872
873 let project = Project::test(fs.clone(), [path!("/project_root").as_ref()], cx).await;
874 let action_log = cx.new(|_| ActionLog::new(project.clone()));
875 let model = Arc::new(FakeLanguageModel::default());
876
877 // Searching for files outside the project worktree should return no results
878 let result = cx
879 .update(|cx| {
880 let input = json!({
881 "regex": "outside_function"
882 });
883 Arc::new(GrepTool)
884 .run(
885 input,
886 Arc::default(),
887 project.clone(),
888 action_log.clone(),
889 model.clone(),
890 None,
891 cx,
892 )
893 .output
894 })
895 .await;
896 let results = result.unwrap();
897 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
898 assert!(
899 paths.is_empty(),
900 "grep_tool should not find files outside the project worktree"
901 );
902
903 // Searching within the project should succeed
904 let result = cx
905 .update(|cx| {
906 let input = json!({
907 "regex": "main"
908 });
909 Arc::new(GrepTool)
910 .run(
911 input,
912 Arc::default(),
913 project.clone(),
914 action_log.clone(),
915 model.clone(),
916 None,
917 cx,
918 )
919 .output
920 })
921 .await;
922 let results = result.unwrap();
923 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
924 assert!(
925 paths.iter().any(|p| p.contains("allowed_file.rs")),
926 "grep_tool should be able to search files inside worktrees"
927 );
928
929 // Searching files that match file_scan_exclusions should return no results
930 let result = cx
931 .update(|cx| {
932 let input = json!({
933 "regex": "special_configuration"
934 });
935 Arc::new(GrepTool)
936 .run(
937 input,
938 Arc::default(),
939 project.clone(),
940 action_log.clone(),
941 model.clone(),
942 None,
943 cx,
944 )
945 .output
946 })
947 .await;
948 let results = result.unwrap();
949 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
950 assert!(
951 paths.is_empty(),
952 "grep_tool should not search files in .secretdir (file_scan_exclusions)"
953 );
954
955 let result = cx
956 .update(|cx| {
957 let input = json!({
958 "regex": "custom_metadata"
959 });
960 Arc::new(GrepTool)
961 .run(
962 input,
963 Arc::default(),
964 project.clone(),
965 action_log.clone(),
966 model.clone(),
967 None,
968 cx,
969 )
970 .output
971 })
972 .await;
973 let results = result.unwrap();
974 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
975 assert!(
976 paths.is_empty(),
977 "grep_tool should not search .mymetadata files (file_scan_exclusions)"
978 );
979
980 // Searching private files should return no results
981 let result = cx
982 .update(|cx| {
983 let input = json!({
984 "regex": "SECRET_KEY"
985 });
986 Arc::new(GrepTool)
987 .run(
988 input,
989 Arc::default(),
990 project.clone(),
991 action_log.clone(),
992 model.clone(),
993 None,
994 cx,
995 )
996 .output
997 })
998 .await;
999 let results = result.unwrap();
1000 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
1001 assert!(
1002 paths.is_empty(),
1003 "grep_tool should not search .mysecrets (private_files)"
1004 );
1005
1006 let result = cx
1007 .update(|cx| {
1008 let input = json!({
1009 "regex": "private_key_content"
1010 });
1011 Arc::new(GrepTool)
1012 .run(
1013 input,
1014 Arc::default(),
1015 project.clone(),
1016 action_log.clone(),
1017 model.clone(),
1018 None,
1019 cx,
1020 )
1021 .output
1022 })
1023 .await;
1024 let results = result.unwrap();
1025 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
1026 assert!(
1027 paths.is_empty(),
1028 "grep_tool should not search .privatekey files (private_files)"
1029 );
1030
1031 let result = cx
1032 .update(|cx| {
1033 let input = json!({
1034 "regex": "sensitive_data"
1035 });
1036 Arc::new(GrepTool)
1037 .run(
1038 input,
1039 Arc::default(),
1040 project.clone(),
1041 action_log.clone(),
1042 model.clone(),
1043 None,
1044 cx,
1045 )
1046 .output
1047 })
1048 .await;
1049 let results = result.unwrap();
1050 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
1051 assert!(
1052 paths.is_empty(),
1053 "grep_tool should not search .mysensitive files (private_files)"
1054 );
1055
1056 // Searching a normal file should still work, even with private_files configured
1057 let result = cx
1058 .update(|cx| {
1059 let input = json!({
1060 "regex": "normal_file_content"
1061 });
1062 Arc::new(GrepTool)
1063 .run(
1064 input,
1065 Arc::default(),
1066 project.clone(),
1067 action_log.clone(),
1068 model.clone(),
1069 None,
1070 cx,
1071 )
1072 .output
1073 })
1074 .await;
1075 let results = result.unwrap();
1076 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
1077 assert!(
1078 paths.iter().any(|p| p.contains("normal_file.rs")),
1079 "Should be able to search normal files"
1080 );
1081
1082 // Path traversal attempts with .. in include_pattern should not escape project
1083 let result = cx
1084 .update(|cx| {
1085 let input = json!({
1086 "regex": "outside_function",
1087 "include_pattern": "../outside_project/**/*.rs"
1088 });
1089 Arc::new(GrepTool)
1090 .run(
1091 input,
1092 Arc::default(),
1093 project.clone(),
1094 action_log.clone(),
1095 model.clone(),
1096 None,
1097 cx,
1098 )
1099 .output
1100 })
1101 .await;
1102 let results = result.unwrap();
1103 let paths = extract_paths_from_results(&results.content.as_str().unwrap());
1104 assert!(
1105 paths.is_empty(),
1106 "grep_tool should not allow escaping project boundaries with relative paths"
1107 );
1108 }
1109
1110 #[gpui::test]
1111 async fn test_grep_with_multiple_worktree_settings(cx: &mut TestAppContext) {
1112 init_test(cx);
1113
1114 let fs = FakeFs::new(cx.executor());
1115
1116 // Create first worktree with its own private files
1117 fs.insert_tree(
1118 path!("/worktree1"),
1119 json!({
1120 ".zed": {
1121 "settings.json": r#"{
1122 "file_scan_exclusions": ["**/fixture.*"],
1123 "private_files": ["**/secret.rs"]
1124 }"#
1125 },
1126 "src": {
1127 "main.rs": "fn main() { let secret_key = \"hidden\"; }",
1128 "secret.rs": "const API_KEY: &str = \"secret_value\";",
1129 "utils.rs": "pub fn get_config() -> String { \"config\".to_string() }"
1130 },
1131 "tests": {
1132 "test.rs": "fn test_secret() { assert!(true); }",
1133 "fixture.sql": "SELECT * FROM secret_table;"
1134 }
1135 }),
1136 )
1137 .await;
1138
1139 // Create second worktree with different private files
1140 fs.insert_tree(
1141 path!("/worktree2"),
1142 json!({
1143 ".zed": {
1144 "settings.json": r#"{
1145 "file_scan_exclusions": ["**/internal.*"],
1146 "private_files": ["**/private.js", "**/data.json"]
1147 }"#
1148 },
1149 "lib": {
1150 "public.js": "export function getSecret() { return 'public'; }",
1151 "private.js": "const SECRET_KEY = \"private_value\";",
1152 "data.json": "{\"secret_data\": \"hidden\"}"
1153 },
1154 "docs": {
1155 "README.md": "# Documentation with secret info",
1156 "internal.md": "Internal secret documentation"
1157 }
1158 }),
1159 )
1160 .await;
1161
1162 // Set global settings
1163 cx.update(|cx| {
1164 SettingsStore::update_global(cx, |store, cx| {
1165 store.update_user_settings::<WorktreeSettings>(cx, |settings| {
1166 settings.file_scan_exclusions =
1167 Some(vec!["**/.git".to_string(), "**/node_modules".to_string()]);
1168 settings.private_files = Some(vec!["**/.env".to_string()]);
1169 });
1170 });
1171 });
1172
1173 let project = Project::test(
1174 fs.clone(),
1175 [path!("/worktree1").as_ref(), path!("/worktree2").as_ref()],
1176 cx,
1177 )
1178 .await;
1179
1180 // Wait for worktrees to be fully scanned
1181 cx.executor().run_until_parked();
1182
1183 let action_log = cx.new(|_| ActionLog::new(project.clone()));
1184 let model = Arc::new(FakeLanguageModel::default());
1185
1186 // Search for "secret" - should exclude files based on worktree-specific settings
1187 let result = cx
1188 .update(|cx| {
1189 let input = json!({
1190 "regex": "secret",
1191 "case_sensitive": false
1192 });
1193 Arc::new(GrepTool)
1194 .run(
1195 input,
1196 Arc::default(),
1197 project.clone(),
1198 action_log.clone(),
1199 model.clone(),
1200 None,
1201 cx,
1202 )
1203 .output
1204 })
1205 .await
1206 .unwrap();
1207
1208 let content = result.content.as_str().unwrap();
1209 let paths = extract_paths_from_results(&content);
1210
1211 // Should find matches in non-private files
1212 assert!(
1213 paths.iter().any(|p| p.contains("main.rs")),
1214 "Should find 'secret' in worktree1/src/main.rs"
1215 );
1216 assert!(
1217 paths.iter().any(|p| p.contains("test.rs")),
1218 "Should find 'secret' in worktree1/tests/test.rs"
1219 );
1220 assert!(
1221 paths.iter().any(|p| p.contains("public.js")),
1222 "Should find 'secret' in worktree2/lib/public.js"
1223 );
1224 assert!(
1225 paths.iter().any(|p| p.contains("README.md")),
1226 "Should find 'secret' in worktree2/docs/README.md"
1227 );
1228
1229 // Should NOT find matches in private/excluded files based on worktree settings
1230 assert!(
1231 !paths.iter().any(|p| p.contains("secret.rs")),
1232 "Should not search in worktree1/src/secret.rs (local private_files)"
1233 );
1234 assert!(
1235 !paths.iter().any(|p| p.contains("fixture.sql")),
1236 "Should not search in worktree1/tests/fixture.sql (local file_scan_exclusions)"
1237 );
1238 assert!(
1239 !paths.iter().any(|p| p.contains("private.js")),
1240 "Should not search in worktree2/lib/private.js (local private_files)"
1241 );
1242 assert!(
1243 !paths.iter().any(|p| p.contains("data.json")),
1244 "Should not search in worktree2/lib/data.json (local private_files)"
1245 );
1246 assert!(
1247 !paths.iter().any(|p| p.contains("internal.md")),
1248 "Should not search in worktree2/docs/internal.md (local file_scan_exclusions)"
1249 );
1250
1251 // Test with `include_pattern` specific to one worktree
1252 let result = cx
1253 .update(|cx| {
1254 let input = json!({
1255 "regex": "secret",
1256 "include_pattern": "worktree1/**/*.rs"
1257 });
1258 Arc::new(GrepTool)
1259 .run(
1260 input,
1261 Arc::default(),
1262 project.clone(),
1263 action_log.clone(),
1264 model.clone(),
1265 None,
1266 cx,
1267 )
1268 .output
1269 })
1270 .await
1271 .unwrap();
1272
1273 let content = result.content.as_str().unwrap();
1274 let paths = extract_paths_from_results(&content);
1275
1276 // Should only find matches in worktree1 *.rs files (excluding private ones)
1277 assert!(
1278 paths.iter().any(|p| p.contains("main.rs")),
1279 "Should find match in worktree1/src/main.rs"
1280 );
1281 assert!(
1282 paths.iter().any(|p| p.contains("test.rs")),
1283 "Should find match in worktree1/tests/test.rs"
1284 );
1285 assert!(
1286 !paths.iter().any(|p| p.contains("secret.rs")),
1287 "Should not find match in excluded worktree1/src/secret.rs"
1288 );
1289 assert!(
1290 paths.iter().all(|p| !p.contains("worktree2")),
1291 "Should not find any matches in worktree2"
1292 );
1293 }
1294
1295 // Helper function to extract file paths from grep results
1296 fn extract_paths_from_results(results: &str) -> Vec<String> {
1297 results
1298 .lines()
1299 .filter(|line| line.starts_with("## Matches in "))
1300 .map(|line| {
1301 line.strip_prefix("## Matches in ")
1302 .unwrap()
1303 .trim()
1304 .to_string()
1305 })
1306 .collect()
1307 }
1308}