read_file_tool.rs

   1use action_log::ActionLog;
   2use agent_client_protocol::{self as acp, ToolCallUpdateFields};
   3use anyhow::{Context as _, Result, anyhow};
   4use gpui::{App, Entity, SharedString, Task, WeakEntity};
   5use indoc::formatdoc;
   6use language::Point;
   7use language_model::{LanguageModelImage, LanguageModelToolResultContent};
   8use project::{AgentLocation, ImageItem, Project, WorktreeSettings, image_store};
   9use schemars::JsonSchema;
  10use serde::{Deserialize, Serialize};
  11use settings::Settings;
  12use std::sync::Arc;
  13use text::OffsetRangeExt as _;
  14use util::markdown::MarkdownCodeBlock;
  15
  16use crate::{AgentTool, Thread, ToolCallEventStream, outline};
  17
  18const DEFAULT_MAX_BYTES: usize = 64 * 1024;
  19const HARD_MAX_BYTES: usize = 256 * 1024;
  20const MAX_SYNTAX_EXPANSION_ROWS: u32 = 500;
  21
  22/// Reads the content of the given file in the project.
  23///
  24/// - Never attempt to read a path that hasn't been previously mentioned.
  25/// - For large files, this tool returns a file outline with symbol names and line numbers instead of the full content.
  26///   This outline IS a successful response - use the line numbers to read specific sections with start_line/end_line.
  27///   Do NOT retry reading the same file without line numbers if you receive an outline.
  28///
  29/// This tool supports two ways of reading text:
  30///
  31/// - **Line range mode**: provide `start_line` and/or `end_line` (1-based, inclusive end).
  32/// - **Byte window mode**: provide `start_byte` and/or `max_bytes` (0-based byte offsets).
  33///   Byte window results are rounded to whole line boundaries, prefer syntactic expansion when available,
  34///   and are bounded by a server-side hard cap.
  35///
  36/// Byte window mode is intended for efficient paging and reducing repeated small reads. When used,
  37/// the returned content includes a brief header with the requested/rounded/returned byte ranges and line range,
  38/// which can be used to choose the next `start_byte` deterministically.
  39#[derive(Debug, Serialize, Deserialize, JsonSchema)]
  40pub struct ReadFileToolInput {
  41    /// The relative path of the file to read.
  42    ///
  43    /// This path should never be absolute, and the first component of the path should always be a root directory in a project.
  44    ///
  45    /// <example>
  46    /// If the project has the following root directories:
  47    ///
  48    /// - /a/b/directory1
  49    /// - /c/d/directory2
  50    ///
  51    /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
  52    /// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
  53    /// </example>
  54    pub path: String,
  55    /// Optional line number to start reading on (1-based index)
  56    #[serde(default)]
  57    pub start_line: Option<u32>,
  58    /// Optional line number to end reading on (1-based index, inclusive)
  59    #[serde(default)]
  60    pub end_line: Option<u32>,
  61
  62    /// Optional byte offset to start reading on (0-based index).
  63    ///
  64    /// When provided (or when `max_bytes` is provided), this call uses **byte window mode**.
  65    /// The returned content is rounded to whole line boundaries (no partial lines).
  66    ///
  67    /// For efficient paging, use the byte-range header included in byte window outputs to choose the next
  68    /// `start_byte` deterministically.
  69    #[serde(default)]
  70    pub start_byte: Option<u64>,
  71
  72    /// Optional maximum number of bytes to read.
  73    ///
  74    /// When provided (or when `start_byte` is provided), this call uses **byte window mode**.
  75    /// The requested size is bounded by a server-side hard cap.
  76    ///
  77    /// Prefer setting a larger `max_bytes` (up to the hard cap) when you expect to read adjacent sections, to reduce
  78    /// repeated paging calls.
  79    #[serde(default)]
  80    pub max_bytes: Option<u32>,
  81}
  82
  83pub struct ReadFileTool {
  84    thread: WeakEntity<Thread>,
  85    project: Entity<Project>,
  86    action_log: Entity<ActionLog>,
  87}
  88
  89impl ReadFileTool {
  90    pub fn new(
  91        thread: WeakEntity<Thread>,
  92        project: Entity<Project>,
  93        action_log: Entity<ActionLog>,
  94    ) -> Self {
  95        Self {
  96            thread,
  97            project,
  98            action_log,
  99        }
 100    }
 101}
 102
 103impl AgentTool for ReadFileTool {
 104    type Input = ReadFileToolInput;
 105    type Output = LanguageModelToolResultContent;
 106
 107    fn name() -> &'static str {
 108        "read_file"
 109    }
 110
 111    fn kind() -> acp::ToolKind {
 112        acp::ToolKind::Read
 113    }
 114
 115    fn initial_title(
 116        &self,
 117        input: Result<Self::Input, serde_json::Value>,
 118        cx: &mut App,
 119    ) -> SharedString {
 120        if let Ok(input) = input
 121            && let Some(project_path) = self.project.read(cx).find_project_path(&input.path, cx)
 122            && let Some(path) = self
 123                .project
 124                .read(cx)
 125                .short_full_path_for_project_path(&project_path, cx)
 126        {
 127            match (input.start_line, input.end_line) {
 128                (Some(start), Some(end)) => {
 129                    format!("Read file `{path}` (lines {}-{})", start, end,)
 130                }
 131                (Some(start), None) => {
 132                    format!("Read file `{path}` (from line {})", start)
 133                }
 134                _ => format!("Read file `{path}`"),
 135            }
 136            .into()
 137        } else {
 138            "Read file".into()
 139        }
 140    }
 141
 142    fn run(
 143        self: Arc<Self>,
 144        input: Self::Input,
 145        event_stream: ToolCallEventStream,
 146        cx: &mut App,
 147    ) -> Task<Result<LanguageModelToolResultContent>> {
 148        let Some(project_path) = self.project.read(cx).find_project_path(&input.path, cx) else {
 149            return Task::ready(Err(anyhow!("Path {} not found in project", &input.path)));
 150        };
 151        let Some(abs_path) = self.project.read(cx).absolute_path(&project_path, cx) else {
 152            return Task::ready(Err(anyhow!(
 153                "Failed to convert {} to absolute path",
 154                &input.path
 155            )));
 156        };
 157
 158        // Error out if this path is either excluded or private in global settings
 159        let global_settings = WorktreeSettings::get_global(cx);
 160        if global_settings.is_path_excluded(&project_path.path) {
 161            return Task::ready(Err(anyhow!(
 162                "Cannot read file because its path matches the global `file_scan_exclusions` setting: {}",
 163                &input.path
 164            )));
 165        }
 166
 167        if global_settings.is_path_private(&project_path.path) {
 168            return Task::ready(Err(anyhow!(
 169                "Cannot read file because its path matches the global `private_files` setting: {}",
 170                &input.path
 171            )));
 172        }
 173
 174        // Error out if this path is either excluded or private in worktree settings
 175        let worktree_settings = WorktreeSettings::get(Some((&project_path).into()), cx);
 176        if worktree_settings.is_path_excluded(&project_path.path) {
 177            return Task::ready(Err(anyhow!(
 178                "Cannot read file because its path matches the worktree `file_scan_exclusions` setting: {}",
 179                &input.path
 180            )));
 181        }
 182
 183        if worktree_settings.is_path_private(&project_path.path) {
 184            return Task::ready(Err(anyhow!(
 185                "Cannot read file because its path matches the worktree `private_files` setting: {}",
 186                &input.path
 187            )));
 188        }
 189
 190        let file_path = input.path.clone();
 191
 192        event_stream.update_fields(ToolCallUpdateFields::new().locations(vec![
 193                acp::ToolCallLocation::new(&abs_path)
 194                    .line(input.start_line.map(|line| line.saturating_sub(1))),
 195            ]));
 196
 197        if image_store::is_image_file(&self.project, &project_path, cx) {
 198            return cx.spawn(async move |cx| {
 199                let image_entity: Entity<ImageItem> = cx
 200                    .update(|cx| {
 201                        self.project.update(cx, |project, cx| {
 202                            project.open_image(project_path.clone(), cx)
 203                        })
 204                    })?
 205                    .await?;
 206
 207                let image =
 208                    image_entity.read_with(cx, |image_item, _| Arc::clone(&image_item.image))?;
 209
 210                let language_model_image = cx
 211                    .update(|cx| LanguageModelImage::from_image(image, cx))?
 212                    .await
 213                    .context("processing image")?;
 214
 215                Ok(language_model_image.into())
 216            });
 217        }
 218
 219        let project = self.project.clone();
 220        let action_log = self.action_log.clone();
 221
 222        cx.spawn(async move |cx| {
 223            let buffer = cx
 224                .update(|cx| {
 225                    project.update(cx, |project, cx| {
 226                        project.open_buffer(project_path.clone(), cx)
 227                    })
 228                })?
 229                .await?;
 230            if buffer.read_with(cx, |buffer, _| {
 231                buffer
 232                    .file()
 233                    .as_ref()
 234                    .is_none_or(|file| !file.disk_state().exists())
 235            })? {
 236                anyhow::bail!("{file_path} not found");
 237            }
 238
 239            // Record the file read time and mtime
 240            if let Some(mtime) = buffer.read_with(cx, |buffer, _| {
 241                buffer.file().and_then(|file| file.disk_state().mtime())
 242            })? {
 243                self.thread
 244                    .update(cx, |thread, _| {
 245                        thread.file_read_times.insert(abs_path.to_path_buf(), mtime);
 246                    })
 247                    .ok();
 248            }
 249
 250            let mut anchor = None;
 251
 252            // Check if specific line ranges are provided
 253            let result = if input.start_line.is_some() || input.end_line.is_some() {
 254                let result = buffer.read_with(cx, |buffer, _cx| {
 255                    // .max(1) because despite instructions to be 1-indexed, sometimes the model passes 0.
 256                    let start = input.start_line.unwrap_or(1).max(1);
 257                    let start_row = start - 1;
 258                    if start_row <= buffer.max_point().row {
 259                        let column = buffer.line_indent_for_row(start_row).raw_len();
 260                        anchor = Some(buffer.anchor_before(Point::new(start_row, column)));
 261                    }
 262
 263                    let mut end_row = input.end_line.unwrap_or(u32::MAX);
 264                    if end_row <= start_row {
 265                        end_row = start_row + 1; // read at least one lines
 266                    }
 267                    let start = buffer.anchor_before(Point::new(start_row, 0));
 268                    let end = buffer.anchor_before(Point::new(end_row, 0));
 269                    buffer.text_for_range(start..end).collect::<String>()
 270                })?;
 271
 272                action_log.update(cx, |log, cx| {
 273                    log.buffer_read(buffer.clone(), cx);
 274                })?;
 275
 276                Ok(result.into())
 277            } else if input.start_byte.is_some() || input.max_bytes.is_some() {
 278                let (window_text, window_anchor) = buffer.read_with(cx, |buffer, _cx| {
 279                    let snapshot = buffer.snapshot();
 280
 281                    let requested_start_offset = input
 282                        .start_byte
 283                        .unwrap_or(0)
 284                        .min(snapshot.len() as u64) as usize;
 285
 286                    let requested_len = input
 287                        .max_bytes
 288                        .map(|bytes| bytes as usize)
 289                        .unwrap_or(DEFAULT_MAX_BYTES);
 290
 291                    let requested_len = requested_len.min(HARD_MAX_BYTES);
 292
 293                    let requested_start_offset =
 294                        snapshot.as_rope().floor_char_boundary(requested_start_offset);
 295                    let requested_end_offset = snapshot
 296                        .as_rope()
 297                        .floor_char_boundary(
 298                            requested_start_offset
 299                                .saturating_add(requested_len)
 300                                .min(snapshot.len()),
 301                        );
 302
 303                    let requested_byte_range = requested_start_offset..requested_end_offset;
 304                    let mut range = requested_byte_range.to_point(&snapshot);
 305
 306                    // Round to line boundaries: no partial lines.
 307                    range.start.column = 0;
 308                    range.end.column = snapshot.line_len(range.end.row);
 309
 310                    let rounded_byte_range = range.to_offset(&snapshot);
 311
 312                    // Prefer syntactic expansion (clean boundaries) when available, but only if it stays bounded.
 313                    let mut used_syntactic_expansion = false;
 314                    if let Some(ancestor_node) = snapshot.syntax_ancestor(range.clone()) {
 315                        let mut ancestor_range = ancestor_node.byte_range().to_point(&snapshot);
 316                        ancestor_range.start.column = 0;
 317
 318                        let max_end_row = (ancestor_range.start.row + MAX_SYNTAX_EXPANSION_ROWS)
 319                            .min(snapshot.max_point().row);
 320                        let capped_end_row = ancestor_range.end.row.min(max_end_row);
 321                        ancestor_range.end =
 322                            Point::new(capped_end_row, snapshot.line_len(capped_end_row));
 323
 324                        let ancestor_byte_range = ancestor_range.to_offset(&snapshot);
 325                        if ancestor_byte_range.len() <= HARD_MAX_BYTES {
 326                            range = ancestor_range;
 327                            used_syntactic_expansion = true;
 328                        }
 329                    }
 330
 331                    let effective_byte_range = range.to_offset(&snapshot);
 332
 333                    let start_anchor = buffer.anchor_before(Point::new(range.start.row, 0));
 334                    let end_row_exclusive = (range.end.row + 1).min(snapshot.max_point().row + 1);
 335                    let end_anchor = buffer.anchor_before(Point::new(end_row_exclusive, 0));
 336                    let mut text = buffer.text_for_range(start_anchor..end_anchor).collect::<String>();
 337
 338                    let mut header = String::new();
 339                    header.push_str("SUCCESS: Byte-window read.\n");
 340                    header.push_str(&format!(
 341                        "Requested bytes: [{}-{}) (len {})\n",
 342                        requested_byte_range.start,
 343                        requested_byte_range.end,
 344                        requested_byte_range.len()
 345                    ));
 346                    header.push_str(&format!(
 347                        "Rounded bytes:   [{}-{}) (len {})\n",
 348                        rounded_byte_range.start,
 349                        rounded_byte_range.end,
 350                        rounded_byte_range.len()
 351                    ));
 352                    header.push_str(&format!(
 353                        "Returned bytes:  [{}-{}) (len {})\n",
 354                        effective_byte_range.start,
 355                        effective_byte_range.end,
 356                        effective_byte_range.len()
 357                    ));
 358                    header.push_str(&format!(
 359                        "Returned lines:  {}-{}\n",
 360                        range.start.row + 1,
 361                        range.end.row + 1
 362                    ));
 363                    header.push_str(&format!(
 364                        "Syntactic expansion: {}\n\n",
 365                        if used_syntactic_expansion { "yes" } else { "no" }
 366                    ));
 367
 368                    // Enforce a hard output cap. If the chosen range expanded beyond the cap (e.g. large lines),
 369                    // fall back to the rounded byte window and cap to HARD_MAX_BYTES on a UTF-8 boundary.
 370                    if effective_byte_range.len() > HARD_MAX_BYTES {
 371                        let fallback_end = snapshot.as_rope().floor_char_boundary(
 372                            (rounded_byte_range.start + HARD_MAX_BYTES).min(snapshot.len()),
 373                        );
 374                        let fallback_range =
 375                            (rounded_byte_range.start..fallback_end).to_point(&snapshot);
 376
 377                        let fallback_start_anchor =
 378                            buffer.anchor_before(Point::new(fallback_range.start.row, 0));
 379                        let fallback_end_anchor =
 380                            buffer.anchor_before(Point::new(fallback_range.end.row, 0));
 381                        text = buffer
 382                            .text_for_range(fallback_start_anchor..fallback_end_anchor)
 383                            .collect::<String>();
 384
 385                        header.push_str(
 386                            "NOTE: Returned content exceeded the hard cap after rounding/expansion; \
 387falling back to a capped byte window.\n\n",
 388                        );
 389                    }
 390
 391                    (format!("{header}{text}"), Some(start_anchor))
 392                })?;
 393
 394                if let Some(a) = window_anchor {
 395                    anchor = Some(a);
 396                }
 397
 398                action_log.update(cx, |log, cx| {
 399                    log.buffer_read(buffer.clone(), cx);
 400                })?;
 401
 402                Ok(window_text.into())
 403            } else {
 404                // No line ranges specified, so check file size to see if it's too big.
 405                let buffer_content = outline::get_buffer_content_or_outline(
 406                    buffer.clone(),
 407                    Some(&abs_path.to_string_lossy()),
 408                    cx,
 409                )
 410                .await?;
 411
 412                action_log.update(cx, |log, cx| {
 413                    log.buffer_read(buffer.clone(), cx);
 414                })?;
 415
 416                if buffer_content.is_outline {
 417                    Ok(formatdoc! {"
 418                        SUCCESS: File outline retrieved. This file is too large to read all at once, so the outline below shows the file's structure with line numbers.
 419
 420                        IMPORTANT: Do NOT retry this call without line numbers - you will get the same outline.
 421                        Instead, use the line numbers below to read specific sections by calling this tool again with start_line and end_line parameters.
 422
 423                        {}
 424
 425                        NEXT STEPS: To read a specific symbol's implementation, call read_file with the same path plus start_line and end_line from the outline above.
 426                        For example, to read a function shown as [L100-150], use start_line: 100 and end_line: 150.", buffer_content.text
 427                    }
 428                    .into())
 429                } else {
 430                    Ok(buffer_content.text.into())
 431                }
 432            };
 433
 434            project.update(cx, |project, cx| {
 435                project.set_agent_location(
 436                    Some(AgentLocation {
 437                        buffer: buffer.downgrade(),
 438                        position: anchor.unwrap_or_else(|| {
 439                            text::Anchor::min_for_buffer(buffer.read(cx).remote_id())
 440                        }),
 441                    }),
 442                    cx,
 443                );
 444                if let Ok(LanguageModelToolResultContent::Text(text)) = &result {
 445                    let markdown = MarkdownCodeBlock {
 446                        tag: &input.path,
 447                        text,
 448                    }
 449                    .to_string();
 450                    event_stream.update_fields(ToolCallUpdateFields::new().content(vec![
 451                        acp::ToolCallContent::Content(acp::Content::new(markdown)),
 452                    ]));
 453                }
 454            })?;
 455
 456            result
 457        })
 458    }
 459}
 460
 461#[cfg(test)]
 462mod test {
 463    use super::*;
 464    use crate::{ContextServerRegistry, Templates, Thread};
 465    use gpui::{AppContext, TestAppContext, UpdateGlobal as _};
 466    use language_model::fake_provider::FakeLanguageModel;
 467    use project::{FakeFs, Project};
 468    use prompt_store::ProjectContext;
 469    use serde_json::json;
 470    use settings::SettingsStore;
 471    use std::sync::Arc;
 472    use util::path;
 473
 474    #[gpui::test]
 475    async fn test_read_nonexistent_file(cx: &mut TestAppContext) {
 476        init_test(cx);
 477
 478        let fs = FakeFs::new(cx.executor());
 479        fs.insert_tree(path!("/root"), json!({})).await;
 480        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
 481        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 482        let context_server_registry =
 483            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 484        let model = Arc::new(FakeLanguageModel::default());
 485        let thread = cx.new(|cx| {
 486            Thread::new(
 487                project.clone(),
 488                cx.new(|_cx| ProjectContext::default()),
 489                context_server_registry,
 490                Templates::new(),
 491                Some(model),
 492                cx,
 493            )
 494        });
 495        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 496        let (event_stream, _) = ToolCallEventStream::test();
 497
 498        let result = cx
 499            .update(|cx| {
 500                let input = ReadFileToolInput {
 501                    path: "root/nonexistent_file.txt".to_string(),
 502                    start_line: None,
 503                    end_line: None,
 504                    start_byte: None,
 505                    max_bytes: None,
 506                };
 507                tool.run(input, event_stream, cx)
 508            })
 509            .await;
 510        assert_eq!(
 511            result.unwrap_err().to_string(),
 512            "root/nonexistent_file.txt not found"
 513        );
 514    }
 515
 516    #[gpui::test]
 517    async fn test_read_small_file(cx: &mut TestAppContext) {
 518        init_test(cx);
 519
 520        let fs = FakeFs::new(cx.executor());
 521        fs.insert_tree(
 522            path!("/root"),
 523            json!({
 524                "small_file.txt": "This is a small file content"
 525            }),
 526        )
 527        .await;
 528        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
 529        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 530        let context_server_registry =
 531            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 532        let model = Arc::new(FakeLanguageModel::default());
 533        let thread = cx.new(|cx| {
 534            Thread::new(
 535                project.clone(),
 536                cx.new(|_cx| ProjectContext::default()),
 537                context_server_registry,
 538                Templates::new(),
 539                Some(model),
 540                cx,
 541            )
 542        });
 543        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 544        let result = cx
 545            .update(|cx| {
 546                let input = ReadFileToolInput {
 547                    path: "root/small_file.txt".into(),
 548                    start_line: None,
 549                    end_line: None,
 550                    start_byte: None,
 551                    max_bytes: None,
 552                };
 553                tool.run(input, ToolCallEventStream::test().0, cx)
 554            })
 555            .await;
 556        assert_eq!(result.unwrap(), "This is a small file content".into());
 557    }
 558
 559    #[gpui::test]
 560    async fn test_read_large_file(cx: &mut TestAppContext) {
 561        init_test(cx);
 562
 563        let fs = FakeFs::new(cx.executor());
 564        fs.insert_tree(
 565            path!("/root"),
 566            json!({
 567                "large_file.rs": (0..1000).map(|i| format!("struct Test{} {{\n    a: u32,\n    b: usize,\n}}", i)).collect::<Vec<_>>().join("\n")
 568            }),
 569        )
 570        .await;
 571        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
 572        let language_registry = project.read_with(cx, |project, _| project.languages().clone());
 573        language_registry.add(language::rust_lang());
 574        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 575        let context_server_registry =
 576            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 577        let model = Arc::new(FakeLanguageModel::default());
 578        let thread = cx.new(|cx| {
 579            Thread::new(
 580                project.clone(),
 581                cx.new(|_cx| ProjectContext::default()),
 582                context_server_registry,
 583                Templates::new(),
 584                Some(model),
 585                cx,
 586            )
 587        });
 588        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 589        let result = cx
 590            .update(|cx| {
 591                let input = ReadFileToolInput {
 592                    path: "root/large_file.rs".into(),
 593                    start_line: None,
 594                    end_line: None,
 595                    start_byte: None,
 596                    max_bytes: None,
 597                };
 598                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 599            })
 600            .await
 601            .unwrap();
 602        let content = result.to_str().unwrap();
 603
 604        assert_eq!(
 605            content.lines().skip(7).take(6).collect::<Vec<_>>(),
 606            vec![
 607                "struct Test0 [L1-4]",
 608                " a [L2]",
 609                " b [L3]",
 610                "struct Test1 [L5-8]",
 611                " a [L6]",
 612                " b [L7]",
 613            ]
 614        );
 615
 616        let result = cx
 617            .update(|cx| {
 618                let input = ReadFileToolInput {
 619                    path: "root/large_file.rs".into(),
 620                    start_line: None,
 621                    end_line: None,
 622                    start_byte: None,
 623                    max_bytes: None,
 624                };
 625                tool.run(input, ToolCallEventStream::test().0, cx)
 626            })
 627            .await
 628            .unwrap();
 629        let content = result.to_str().unwrap();
 630        let expected_content = (0..1000)
 631            .flat_map(|i| {
 632                vec![
 633                    format!("struct Test{} [L{}-{}]", i, i * 4 + 1, i * 4 + 4),
 634                    format!(" a [L{}]", i * 4 + 2),
 635                    format!(" b [L{}]", i * 4 + 3),
 636                ]
 637            })
 638            .collect::<Vec<_>>();
 639        pretty_assertions::assert_eq!(
 640            content
 641                .lines()
 642                .skip(7)
 643                .take(expected_content.len())
 644                .collect::<Vec<_>>(),
 645            expected_content
 646        );
 647    }
 648
 649    #[gpui::test]
 650    async fn test_read_file_with_line_range(cx: &mut TestAppContext) {
 651        init_test(cx);
 652
 653        let fs = FakeFs::new(cx.executor());
 654        fs.insert_tree(
 655            path!("/root"),
 656            json!({
 657                "multiline.txt": "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
 658            }),
 659        )
 660        .await;
 661        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
 662
 663        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 664        let context_server_registry =
 665            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 666        let model = Arc::new(FakeLanguageModel::default());
 667        let thread = cx.new(|cx| {
 668            Thread::new(
 669                project.clone(),
 670                cx.new(|_cx| ProjectContext::default()),
 671                context_server_registry,
 672                Templates::new(),
 673                Some(model),
 674                cx,
 675            )
 676        });
 677        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 678        let result = cx
 679            .update(|cx| {
 680                let input = ReadFileToolInput {
 681                    path: "root/multiline.txt".to_string(),
 682                    start_line: Some(2),
 683                    end_line: Some(4),
 684                    start_byte: None,
 685                    max_bytes: None,
 686                };
 687                tool.run(input, ToolCallEventStream::test().0, cx)
 688            })
 689            .await;
 690        assert_eq!(result.unwrap(), "Line 2\nLine 3\nLine 4\n".into());
 691    }
 692
 693    #[gpui::test]
 694    async fn test_read_file_with_byte_window_rounds_to_whole_lines(cx: &mut TestAppContext) {
 695        init_test(cx);
 696
 697        let fs = FakeFs::new(cx.executor());
 698        fs.insert_tree(
 699            path!("/root"),
 700            json!({
 701                "multiline.txt": "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
 702            }),
 703        )
 704        .await;
 705        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
 706
 707        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 708        let context_server_registry =
 709            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 710        let model = Arc::new(FakeLanguageModel::default());
 711        let thread = cx.new(|cx| {
 712            Thread::new(
 713                project.clone(),
 714                cx.new(|_cx| ProjectContext::default()),
 715                context_server_registry,
 716                Templates::new(),
 717                Some(model),
 718                cx,
 719            )
 720        });
 721        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 722
 723        // Request a byte window that starts in the middle of "Line 2", which should round to whole lines.
 724        let line_1 = "Line 1\n";
 725        let start_byte = (line_1.len() + 2) as u64;
 726
 727        let result = cx
 728            .update(|cx| {
 729                let input = ReadFileToolInput {
 730                    path: "root/multiline.txt".to_string(),
 731                    start_line: None,
 732                    end_line: None,
 733                    start_byte: Some(start_byte),
 734                    max_bytes: Some(6),
 735                };
 736                tool.run(input, ToolCallEventStream::test().0, cx)
 737            })
 738            .await
 739            .unwrap()
 740            .to_str()
 741            .unwrap()
 742            .to_string();
 743
 744        assert!(
 745            result.contains("Line 2\n"),
 746            "Expected rounded output to include full line 2, got: {result:?}"
 747        );
 748        assert!(
 749            result.ends_with('\n'),
 750            "Expected rounded output to end on a line boundary, got: {result:?}"
 751        );
 752        assert!(
 753            result.contains("\n\nLine 2\n"),
 754            "Expected rounded output to include full line 2 after the byte-window header, got: {result:?}"
 755        );
 756    }
 757
 758    #[gpui::test]
 759    async fn test_read_file_line_range_edge_cases(cx: &mut TestAppContext) {
 760        init_test(cx);
 761
 762        let fs = FakeFs::new(cx.executor());
 763        fs.insert_tree(
 764            path!("/root"),
 765            json!({
 766                "multiline.txt": "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
 767            }),
 768        )
 769        .await;
 770        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
 771        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 772        let context_server_registry =
 773            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 774        let model = Arc::new(FakeLanguageModel::default());
 775        let thread = cx.new(|cx| {
 776            Thread::new(
 777                project.clone(),
 778                cx.new(|_cx| ProjectContext::default()),
 779                context_server_registry,
 780                Templates::new(),
 781                Some(model),
 782                cx,
 783            )
 784        });
 785        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 786
 787        // start_line of 0 should be treated as 1
 788        let result = cx
 789            .update(|cx| {
 790                let input = ReadFileToolInput {
 791                    path: "root/multiline.txt".to_string(),
 792                    start_line: Some(0),
 793                    end_line: Some(2),
 794                    start_byte: None,
 795                    max_bytes: None,
 796                };
 797                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 798            })
 799            .await;
 800        assert_eq!(result.unwrap(), "Line 1\nLine 2\n".into());
 801
 802        // end_line of 0 should result in at least 1 line
 803        let result = cx
 804            .update(|cx| {
 805                let input = ReadFileToolInput {
 806                    path: "root/multiline.txt".to_string(),
 807                    start_line: Some(1),
 808                    end_line: Some(0),
 809                    start_byte: None,
 810                    max_bytes: None,
 811                };
 812                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 813            })
 814            .await;
 815        assert_eq!(result.unwrap(), "Line 1\n".into());
 816
 817        // when start_line > end_line, should still return at least 1 line
 818        let result = cx
 819            .update(|cx| {
 820                let input = ReadFileToolInput {
 821                    path: "root/multiline.txt".to_string(),
 822                    start_line: Some(3),
 823                    end_line: Some(2),
 824                    start_byte: None,
 825                    max_bytes: None,
 826                };
 827                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 828            })
 829            .await;
 830        assert_eq!(result.unwrap(), "Line 3\n".into());
 831    }
 832
 833    fn init_test(cx: &mut TestAppContext) {
 834        cx.update(|cx| {
 835            let settings_store = SettingsStore::test(cx);
 836            cx.set_global(settings_store);
 837        });
 838    }
 839
 840    #[gpui::test]
 841    async fn test_read_file_security(cx: &mut TestAppContext) {
 842        init_test(cx);
 843
 844        let fs = FakeFs::new(cx.executor());
 845
 846        fs.insert_tree(
 847            path!("/"),
 848            json!({
 849                "project_root": {
 850                    "allowed_file.txt": "This file is in the project",
 851                    ".mysecrets": "SECRET_KEY=abc123",
 852                    ".secretdir": {
 853                        "config": "special configuration"
 854                    },
 855                    ".mymetadata": "custom metadata",
 856                    "subdir": {
 857                        "normal_file.txt": "Normal file content",
 858                        "special.privatekey": "private key content",
 859                        "data.mysensitive": "sensitive data"
 860                    }
 861                },
 862                "outside_project": {
 863                    "sensitive_file.txt": "This file is outside the project"
 864                }
 865            }),
 866        )
 867        .await;
 868
 869        cx.update(|cx| {
 870            use gpui::UpdateGlobal;
 871            use settings::SettingsStore;
 872            SettingsStore::update_global(cx, |store, cx| {
 873                store.update_user_settings(cx, |settings| {
 874                    settings.project.worktree.file_scan_exclusions = Some(vec![
 875                        "**/.secretdir".to_string(),
 876                        "**/.mymetadata".to_string(),
 877                    ]);
 878                    settings.project.worktree.private_files = Some(
 879                        vec![
 880                            "**/.mysecrets".to_string(),
 881                            "**/*.privatekey".to_string(),
 882                            "**/*.mysensitive".to_string(),
 883                        ]
 884                        .into(),
 885                    );
 886                });
 887            });
 888        });
 889
 890        let project = Project::test(fs.clone(), [path!("/project_root").as_ref()], cx).await;
 891        let action_log = cx.new(|_| ActionLog::new(project.clone()));
 892        let context_server_registry =
 893            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
 894        let model = Arc::new(FakeLanguageModel::default());
 895        let thread = cx.new(|cx| {
 896            Thread::new(
 897                project.clone(),
 898                cx.new(|_cx| ProjectContext::default()),
 899                context_server_registry,
 900                Templates::new(),
 901                Some(model),
 902                cx,
 903            )
 904        });
 905        let tool = Arc::new(ReadFileTool::new(thread.downgrade(), project, action_log));
 906
 907        // Reading a file outside the project worktree should fail
 908        let result = cx
 909            .update(|cx| {
 910                let input = ReadFileToolInput {
 911                    path: "/outside_project/sensitive_file.txt".to_string(),
 912                    start_line: None,
 913                    end_line: None,
 914                    start_byte: None,
 915                    max_bytes: None,
 916                };
 917                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 918            })
 919            .await;
 920        assert!(
 921            result.is_err(),
 922            "read_file_tool should error when attempting to read an absolute path outside a worktree"
 923        );
 924
 925        // Reading a file within the project should succeed
 926        let result = cx
 927            .update(|cx| {
 928                let input = ReadFileToolInput {
 929                    path: "project_root/allowed_file.txt".to_string(),
 930                    start_line: None,
 931                    end_line: None,
 932                    start_byte: None,
 933                    max_bytes: None,
 934                };
 935                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 936            })
 937            .await;
 938        assert!(
 939            result.is_ok(),
 940            "read_file_tool should be able to read files inside worktrees"
 941        );
 942
 943        // Reading files that match file_scan_exclusions should fail
 944        let result = cx
 945            .update(|cx| {
 946                let input = ReadFileToolInput {
 947                    path: "project_root/.secretdir/config".to_string(),
 948                    start_line: None,
 949                    end_line: None,
 950                    start_byte: None,
 951                    max_bytes: None,
 952                };
 953                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 954            })
 955            .await;
 956        assert!(
 957            result.is_err(),
 958            "read_file_tool should error when attempting to read files in .secretdir (file_scan_exclusions)"
 959        );
 960
 961        let result = cx
 962            .update(|cx| {
 963                let input = ReadFileToolInput {
 964                    path: "project_root/.mymetadata".to_string(),
 965                    start_line: None,
 966                    end_line: None,
 967                    start_byte: None,
 968                    max_bytes: None,
 969                };
 970                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 971            })
 972            .await;
 973        assert!(
 974            result.is_err(),
 975            "read_file_tool should error when attempting to read .mymetadata files (file_scan_exclusions)"
 976        );
 977
 978        // Reading private files should fail
 979        let result = cx
 980            .update(|cx| {
 981                let input = ReadFileToolInput {
 982                    path: "project_root/.mysecrets".to_string(),
 983                    start_line: None,
 984                    end_line: None,
 985                    start_byte: None,
 986                    max_bytes: None,
 987                };
 988                tool.clone().run(input, ToolCallEventStream::test().0, cx)
 989            })
 990            .await;
 991        assert!(
 992            result.is_err(),
 993            "read_file_tool should error when attempting to read .mysecrets (private_files)"
 994        );
 995
 996        let result = cx
 997            .update(|cx| {
 998                let input = ReadFileToolInput {
 999                    path: "project_root/subdir/special.privatekey".to_string(),
1000                    start_line: None,
1001                    end_line: None,
1002                    start_byte: None,
1003                    max_bytes: None,
1004                };
1005                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1006            })
1007            .await;
1008        assert!(
1009            result.is_err(),
1010            "read_file_tool should error when attempting to read .privatekey files (private_files)"
1011        );
1012
1013        let result = cx
1014            .update(|cx| {
1015                let input = ReadFileToolInput {
1016                    path: "project_root/subdir/data.mysensitive".to_string(),
1017                    start_line: None,
1018                    end_line: None,
1019                    start_byte: None,
1020                    max_bytes: None,
1021                };
1022                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1023            })
1024            .await;
1025        assert!(
1026            result.is_err(),
1027            "read_file_tool should error when attempting to read .mysensitive files (private_files)"
1028        );
1029
1030        // Reading a normal file should still work, even with private_files configured
1031        let result = cx
1032            .update(|cx| {
1033                let input = ReadFileToolInput {
1034                    path: "project_root/subdir/normal_file.txt".to_string(),
1035                    start_line: None,
1036                    end_line: None,
1037                    start_byte: None,
1038                    max_bytes: None,
1039                };
1040                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1041            })
1042            .await;
1043        assert!(result.is_ok(), "Should be able to read normal files");
1044        assert_eq!(result.unwrap(), "Normal file content".into());
1045
1046        // Path traversal attempts with .. should fail
1047        let result = cx
1048            .update(|cx| {
1049                let input = ReadFileToolInput {
1050                    path: "project_root/../outside_project/sensitive_file.txt".to_string(),
1051                    start_line: None,
1052                    end_line: None,
1053                    start_byte: None,
1054                    max_bytes: None,
1055                };
1056                tool.run(input, ToolCallEventStream::test().0, cx)
1057            })
1058            .await;
1059        assert!(
1060            result.is_err(),
1061            "read_file_tool should error when attempting to read a relative path that resolves to outside a worktree"
1062        );
1063    }
1064
1065    #[gpui::test]
1066    async fn test_read_file_with_multiple_worktree_settings(cx: &mut TestAppContext) {
1067        init_test(cx);
1068
1069        let fs = FakeFs::new(cx.executor());
1070
1071        // Create first worktree with its own private_files setting
1072        fs.insert_tree(
1073            path!("/worktree1"),
1074            json!({
1075                "src": {
1076                    "main.rs": "fn main() { println!(\"Hello from worktree1\"); }",
1077                    "secret.rs": "const API_KEY: &str = \"secret_key_1\";",
1078                    "config.toml": "[database]\nurl = \"postgres://localhost/db1\""
1079                },
1080                "tests": {
1081                    "test.rs": "mod tests { fn test_it() {} }",
1082                    "fixture.sql": "CREATE TABLE users (id INT, name VARCHAR(255));"
1083                },
1084                ".zed": {
1085                    "settings.json": r#"{
1086                        "file_scan_exclusions": ["**/fixture.*"],
1087                        "private_files": ["**/secret.rs", "**/config.toml"]
1088                    }"#
1089                }
1090            }),
1091        )
1092        .await;
1093
1094        // Create second worktree with different private_files setting
1095        fs.insert_tree(
1096            path!("/worktree2"),
1097            json!({
1098                "lib": {
1099                    "public.js": "export function greet() { return 'Hello from worktree2'; }",
1100                    "private.js": "const SECRET_TOKEN = \"private_token_2\";",
1101                    "data.json": "{\"api_key\": \"json_secret_key\"}"
1102                },
1103                "docs": {
1104                    "README.md": "# Public Documentation",
1105                    "internal.md": "# Internal Secrets and Configuration"
1106                },
1107                ".zed": {
1108                    "settings.json": r#"{
1109                        "file_scan_exclusions": ["**/internal.*"],
1110                        "private_files": ["**/private.js", "**/data.json"]
1111                    }"#
1112                }
1113            }),
1114        )
1115        .await;
1116
1117        // Set global settings
1118        cx.update(|cx| {
1119            SettingsStore::update_global(cx, |store, cx| {
1120                store.update_user_settings(cx, |settings| {
1121                    settings.project.worktree.file_scan_exclusions =
1122                        Some(vec!["**/.git".to_string(), "**/node_modules".to_string()]);
1123                    settings.project.worktree.private_files =
1124                        Some(vec!["**/.env".to_string()].into());
1125                });
1126            });
1127        });
1128
1129        let project = Project::test(
1130            fs.clone(),
1131            [path!("/worktree1").as_ref(), path!("/worktree2").as_ref()],
1132            cx,
1133        )
1134        .await;
1135
1136        let action_log = cx.new(|_| ActionLog::new(project.clone()));
1137        let context_server_registry =
1138            cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
1139        let model = Arc::new(FakeLanguageModel::default());
1140        let thread = cx.new(|cx| {
1141            Thread::new(
1142                project.clone(),
1143                cx.new(|_cx| ProjectContext::default()),
1144                context_server_registry,
1145                Templates::new(),
1146                Some(model),
1147                cx,
1148            )
1149        });
1150        let tool = Arc::new(ReadFileTool::new(
1151            thread.downgrade(),
1152            project.clone(),
1153            action_log.clone(),
1154        ));
1155
1156        // Test reading allowed files in worktree1
1157        let result = cx
1158            .update(|cx| {
1159                let input = ReadFileToolInput {
1160                    path: "worktree1/src/main.rs".to_string(),
1161                    start_line: None,
1162                    end_line: None,
1163                    start_byte: None,
1164                    max_bytes: None,
1165                };
1166                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1167            })
1168            .await
1169            .unwrap();
1170
1171        assert_eq!(
1172            result,
1173            "fn main() { println!(\"Hello from worktree1\"); }".into()
1174        );
1175
1176        // Test reading private file in worktree1 should fail
1177        let result = cx
1178            .update(|cx| {
1179                let input = ReadFileToolInput {
1180                    path: "worktree1/src/secret.rs".to_string(),
1181                    start_line: None,
1182                    end_line: None,
1183                    start_byte: None,
1184                    max_bytes: None,
1185                };
1186                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1187            })
1188            .await;
1189
1190        assert!(result.is_err());
1191        assert!(
1192            result
1193                .unwrap_err()
1194                .to_string()
1195                .contains("worktree `private_files` setting"),
1196            "Error should mention worktree private_files setting"
1197        );
1198
1199        // Test reading excluded file in worktree1 should fail
1200        let result = cx
1201            .update(|cx| {
1202                let input = ReadFileToolInput {
1203                    path: "worktree1/tests/fixture.sql".to_string(),
1204                    start_line: None,
1205                    end_line: None,
1206                    start_byte: None,
1207                    max_bytes: None,
1208                };
1209                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1210            })
1211            .await;
1212
1213        assert!(result.is_err());
1214        assert!(
1215            result
1216                .unwrap_err()
1217                .to_string()
1218                .contains("worktree `file_scan_exclusions` setting"),
1219            "Error should mention worktree file_scan_exclusions setting"
1220        );
1221
1222        // Test reading allowed files in worktree2
1223        let result = cx
1224            .update(|cx| {
1225                let input = ReadFileToolInput {
1226                    path: "worktree2/lib/public.js".to_string(),
1227                    start_line: None,
1228                    end_line: None,
1229                    start_byte: None,
1230                    max_bytes: None,
1231                };
1232                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1233            })
1234            .await
1235            .unwrap();
1236
1237        assert_eq!(
1238            result,
1239            "export function greet() { return 'Hello from worktree2'; }".into()
1240        );
1241
1242        // Test reading private file in worktree2 should fail
1243        let result = cx
1244            .update(|cx| {
1245                let input = ReadFileToolInput {
1246                    path: "worktree2/lib/private.js".to_string(),
1247                    start_line: None,
1248                    end_line: None,
1249                    start_byte: None,
1250                    max_bytes: None,
1251                };
1252                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1253            })
1254            .await;
1255
1256        assert!(result.is_err());
1257        assert!(
1258            result
1259                .unwrap_err()
1260                .to_string()
1261                .contains("worktree `private_files` setting"),
1262            "Error should mention worktree private_files setting"
1263        );
1264
1265        // Test reading excluded file in worktree2 should fail
1266        let result = cx
1267            .update(|cx| {
1268                let input = ReadFileToolInput {
1269                    path: "worktree2/docs/internal.md".to_string(),
1270                    start_line: None,
1271                    end_line: None,
1272                    start_byte: None,
1273                    max_bytes: None,
1274                };
1275                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1276            })
1277            .await;
1278
1279        assert!(result.is_err());
1280        assert!(
1281            result
1282                .unwrap_err()
1283                .to_string()
1284                .contains("worktree `file_scan_exclusions` setting"),
1285            "Error should mention worktree file_scan_exclusions setting"
1286        );
1287
1288        // Test that files allowed in one worktree but not in another are handled correctly
1289        // (e.g., config.toml is private in worktree1 but doesn't exist in worktree2)
1290        let result = cx
1291            .update(|cx| {
1292                let input = ReadFileToolInput {
1293                    path: "worktree1/src/config.toml".to_string(),
1294                    start_line: None,
1295                    end_line: None,
1296                    start_byte: None,
1297                    max_bytes: None,
1298                };
1299                tool.clone().run(input, ToolCallEventStream::test().0, cx)
1300            })
1301            .await;
1302
1303        assert!(result.is_err());
1304        assert!(
1305            result
1306                .unwrap_err()
1307                .to_string()
1308                .contains("worktree `private_files` setting"),
1309            "Config.toml should be blocked by worktree1's private_files setting"
1310        );
1311    }
1312}