edit_file_thread_test.rs

  1use super::*;
  2use acp_thread::UserMessageId;
  3use action_log::ActionLog;
  4use fs::FakeFs;
  5use language_model::{
  6    LanguageModelCompletionEvent, LanguageModelToolUse, MessageContent, StopReason,
  7    fake_provider::FakeLanguageModel,
  8};
  9use prompt_store::ProjectContext;
 10use serde_json::json;
 11use std::{collections::BTreeMap, sync::Arc, time::Duration};
 12use util::path;
 13
 14#[gpui::test]
 15async fn test_edit_file_tool_in_thread_context(cx: &mut TestAppContext) {
 16    // This test verifies that the edit_file tool works correctly when invoked
 17    // through the full thread flow (model sends ToolUse event -> tool runs -> result sent back).
 18    // This is different from tests that call tool.run() directly.
 19    super::init_test(cx);
 20    super::always_allow_tools(cx);
 21
 22    let fs = FakeFs::new(cx.executor());
 23    fs.insert_tree(
 24        path!("/project"),
 25        json!({
 26            "src": {
 27                "main.rs": "fn main() {\n    println!(\"Hello, world!\");\n}\n"
 28            }
 29        }),
 30    )
 31    .await;
 32
 33    let project = project::Project::test(fs.clone(), [path!("/project").as_ref()], cx).await;
 34    let project_context = cx.new(|_cx| ProjectContext::default());
 35    let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
 36    let context_server_registry =
 37        cx.new(|cx| crate::ContextServerRegistry::new(context_server_store.clone(), cx));
 38    let model = Arc::new(FakeLanguageModel::default());
 39    let fake_model = model.as_fake();
 40
 41    let thread = cx.new(|cx| {
 42        let mut thread = crate::Thread::new(
 43            project.clone(),
 44            project_context,
 45            context_server_registry,
 46            crate::Templates::new(),
 47            Some(model.clone()),
 48            cx,
 49        );
 50        // Add just the tools we need for this test
 51        let language_registry = project.read(cx).languages().clone();
 52        thread.add_tool(crate::ReadFileTool::new(
 53            cx.weak_entity(),
 54            project.clone(),
 55            thread.action_log().clone(),
 56        ));
 57        thread.add_tool(crate::EditFileTool::new(
 58            project.clone(),
 59            cx.weak_entity(),
 60            language_registry,
 61            crate::Templates::new(),
 62        ));
 63        thread
 64    });
 65
 66    // First, read the file so the thread knows about its contents
 67    let _events = thread
 68        .update(cx, |thread, cx| {
 69            thread.send(UserMessageId::new(), ["Read the file src/main.rs"], cx)
 70        })
 71        .unwrap();
 72    cx.run_until_parked();
 73
 74    // Model calls read_file tool
 75    let read_tool_use = LanguageModelToolUse {
 76        id: "read_tool_1".into(),
 77        name: "read_file".into(),
 78        raw_input: json!({"path": "project/src/main.rs"}).to_string(),
 79        input: json!({"path": "project/src/main.rs"}),
 80        is_input_complete: true,
 81        thought_signature: None,
 82    };
 83    fake_model
 84        .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(read_tool_use));
 85    fake_model
 86        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
 87    fake_model.end_last_completion_stream();
 88    cx.run_until_parked();
 89
 90    // Wait for the read tool to complete and model to be called again
 91    while fake_model.pending_completions().is_empty() {
 92        cx.run_until_parked();
 93    }
 94
 95    // Model responds after seeing the file content, then calls edit_file
 96    fake_model.send_last_completion_stream_text_chunk("I'll edit the file now.");
 97    let edit_tool_use = LanguageModelToolUse {
 98        id: "edit_tool_1".into(),
 99        name: "edit_file".into(),
100        raw_input: json!({
101            "display_description": "Change greeting message",
102            "path": "project/src/main.rs",
103            "mode": "edit"
104        })
105        .to_string(),
106        input: json!({
107            "display_description": "Change greeting message",
108            "path": "project/src/main.rs",
109            "mode": "edit"
110        }),
111        is_input_complete: true,
112        thought_signature: None,
113    };
114    fake_model
115        .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(edit_tool_use));
116    fake_model
117        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::ToolUse));
118    fake_model.end_last_completion_stream();
119    cx.run_until_parked();
120
121    // The edit_file tool creates an EditAgent which makes its own model request.
122    // We need to respond to that request with the edit instructions.
123    // Wait for the edit agent's completion request
124    let deadline = std::time::Instant::now() + Duration::from_secs(5);
125    while fake_model.pending_completions().is_empty() {
126        if std::time::Instant::now() >= deadline {
127            panic!(
128                "Timed out waiting for edit agent completion request. Pending: {}",
129                fake_model.pending_completions().len()
130            );
131        }
132        cx.run_until_parked();
133        cx.background_executor
134            .timer(Duration::from_millis(10))
135            .await;
136    }
137
138    // Send the edit agent's response with the XML format it expects
139    let edit_response = "<old_text>println!(\"Hello, world!\");</old_text>\n<new_text>println!(\"Hello, Zed!\");</new_text>";
140    fake_model.send_last_completion_stream_text_chunk(edit_response);
141    fake_model.end_last_completion_stream();
142    cx.run_until_parked();
143
144    // Wait for the edit to complete and the thread to call the model again with tool results
145    let deadline = std::time::Instant::now() + Duration::from_secs(5);
146    while fake_model.pending_completions().is_empty() {
147        if std::time::Instant::now() >= deadline {
148            panic!("Timed out waiting for model to be called after edit completion");
149        }
150        cx.run_until_parked();
151        cx.background_executor
152            .timer(Duration::from_millis(10))
153            .await;
154    }
155
156    // Verify the file was edited
157    let file_content = fs
158        .load(path!("/project/src/main.rs").as_ref())
159        .await
160        .expect("file should exist");
161    assert!(
162        file_content.contains("Hello, Zed!"),
163        "File should have been edited. Content: {}",
164        file_content
165    );
166    assert!(
167        !file_content.contains("Hello, world!"),
168        "Old content should be replaced. Content: {}",
169        file_content
170    );
171
172    // Verify the tool result was sent back to the model
173    let pending = fake_model.pending_completions();
174    assert!(
175        !pending.is_empty(),
176        "Model should have been called with tool result"
177    );
178
179    let last_request = pending.last().unwrap();
180    let has_tool_result = last_request.messages.iter().any(|m| {
181        m.content
182            .iter()
183            .any(|c| matches!(c, language_model::MessageContent::ToolResult(_)))
184    });
185    assert!(
186        has_tool_result,
187        "Tool result should be in the messages sent back to the model"
188    );
189
190    // Complete the turn
191    fake_model.send_last_completion_stream_text_chunk("I've updated the greeting message.");
192    fake_model
193        .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
194    fake_model.end_last_completion_stream();
195    cx.run_until_parked();
196
197    // Verify the thread completed successfully
198    thread.update(cx, |thread, _cx| {
199        assert!(
200            thread.is_turn_complete(),
201            "Thread should be complete after the turn ends"
202        );
203    });
204}
205
206#[gpui::test]
207async fn test_subagent_uses_read_file_tool(cx: &mut TestAppContext) {
208    // This test verifies that subagents can successfully use the read_file tool
209    // through the full thread flow, and that tools are properly rebound to use
210    // the subagent's thread ID instead of the parent's.
211    super::init_test(cx);
212    super::always_allow_tools(cx);
213
214    cx.update(|cx| {
215        cx.update_flags(true, vec!["subagents".to_string()]);
216    });
217
218    let fs = FakeFs::new(cx.executor());
219    fs.insert_tree(
220        path!("/project"),
221        json!({
222            "src": {
223                "lib.rs": "pub fn hello() -> &'static str {\n    \"Hello from lib!\"\n}\n"
224            }
225        }),
226    )
227    .await;
228
229    let project = project::Project::test(fs.clone(), [path!("/project").as_ref()], cx).await;
230    let project_context = cx.new(|_cx| ProjectContext::default());
231    let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
232    let context_server_registry =
233        cx.new(|cx| crate::ContextServerRegistry::new(context_server_store.clone(), cx));
234    let model = Arc::new(FakeLanguageModel::default());
235    let fake_model = model.as_fake();
236
237    // Create subagent context
238    let subagent_context = crate::SubagentContext {
239        parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
240        tool_use_id: language_model::LanguageModelToolUseId::from("subagent-tool-use-id"),
241        depth: 1,
242        summary_prompt: "Summarize what you found".to_string(),
243        context_low_prompt: "Context low".to_string(),
244    };
245
246    // Create parent tools that will be passed to the subagent
247    // This simulates how the subagent_tool passes tools to new_subagent
248    let parent_tools: BTreeMap<gpui::SharedString, std::sync::Arc<dyn crate::AnyAgentTool>> = {
249        let action_log = cx.new(|_| ActionLog::new(project.clone()));
250        // Create a "fake" parent thread reference - this should get rebound
251        let fake_parent_thread = cx.new(|cx| {
252            crate::Thread::new(
253                project.clone(),
254                cx.new(|_cx| ProjectContext::default()),
255                cx.new(|cx| crate::ContextServerRegistry::new(context_server_store.clone(), cx)),
256                crate::Templates::new(),
257                Some(model.clone()),
258                cx,
259            )
260        });
261        let mut tools: BTreeMap<gpui::SharedString, std::sync::Arc<dyn crate::AnyAgentTool>> =
262            BTreeMap::new();
263        tools.insert(
264            "read_file".into(),
265            crate::ReadFileTool::new(fake_parent_thread.downgrade(), project.clone(), action_log)
266                .erase(),
267        );
268        tools
269    };
270
271    // Create subagent - tools should be rebound to use subagent's thread
272    let subagent = cx.new(|cx| {
273        crate::Thread::new_subagent(
274            project.clone(),
275            project_context,
276            context_server_registry,
277            crate::Templates::new(),
278            model.clone(),
279            subagent_context,
280            parent_tools,
281            cx,
282        )
283    });
284
285    // Get the subagent's thread ID
286    let _subagent_thread_id = subagent.read_with(cx, |thread, _| thread.id().to_string());
287
288    // Verify the subagent has the read_file tool
289    subagent.read_with(cx, |thread, _| {
290        assert!(
291            thread.has_registered_tool("read_file"),
292            "subagent should have read_file tool"
293        );
294    });
295
296    // Submit a user message to the subagent
297    subagent
298        .update(cx, |thread, cx| {
299            thread.submit_user_message("Read the file src/lib.rs", cx)
300        })
301        .unwrap();
302    cx.run_until_parked();
303
304    // Simulate the model calling the read_file tool
305    let read_tool_use = LanguageModelToolUse {
306        id: "read_tool_1".into(),
307        name: "read_file".into(),
308        raw_input: json!({"path": "project/src/lib.rs"}).to_string(),
309        input: json!({"path": "project/src/lib.rs"}),
310        is_input_complete: true,
311        thought_signature: None,
312    };
313    fake_model
314        .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(read_tool_use));
315    fake_model.end_last_completion_stream();
316    cx.run_until_parked();
317
318    // Wait for the tool to complete and the model to be called again with tool results
319    let deadline = std::time::Instant::now() + Duration::from_secs(5);
320    while fake_model.pending_completions().is_empty() {
321        if std::time::Instant::now() >= deadline {
322            panic!("Timed out waiting for model to be called after read_file tool completion");
323        }
324        cx.run_until_parked();
325        cx.background_executor
326            .timer(Duration::from_millis(10))
327            .await;
328    }
329
330    // Verify the tool result was sent back to the model
331    let pending = fake_model.pending_completions();
332    assert!(
333        !pending.is_empty(),
334        "Model should have been called with tool result"
335    );
336
337    let last_request = pending.last().unwrap();
338    let tool_result = last_request.messages.iter().find_map(|m| {
339        m.content.iter().find_map(|c| match c {
340            MessageContent::ToolResult(result) => Some(result),
341            _ => None,
342        })
343    });
344    assert!(
345        tool_result.is_some(),
346        "Tool result should be in the messages sent back to the model"
347    );
348
349    // Verify the tool result contains the file content
350    let result = tool_result.unwrap();
351    let result_text = match &result.content {
352        language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
353        _ => panic!("expected text content in tool result"),
354    };
355    assert!(
356        result_text.contains("Hello from lib!"),
357        "Tool result should contain file content, got: {}",
358        result_text
359    );
360
361    // Verify the subagent is ready for more input (tool completed, model called again)
362    // This test verifies the subagent can successfully use read_file tool.
363    // The summary flow is tested separately in test_subagent_returns_summary_on_completion.
364}
365
366#[gpui::test]
367async fn test_subagent_uses_edit_file_tool(cx: &mut TestAppContext) {
368    // This test verifies that subagents can successfully use the edit_file tool
369    // through the full thread flow, including the edit agent's model request.
370    // It also verifies that the edit agent uses the subagent's thread ID, not the parent's.
371    super::init_test(cx);
372    super::always_allow_tools(cx);
373
374    cx.update(|cx| {
375        cx.update_flags(true, vec!["subagents".to_string()]);
376    });
377
378    let fs = FakeFs::new(cx.executor());
379    fs.insert_tree(
380        path!("/project"),
381        json!({
382            "src": {
383                "config.rs": "pub const VERSION: &str = \"1.0.0\";\n"
384            }
385        }),
386    )
387    .await;
388
389    let project = project::Project::test(fs.clone(), [path!("/project").as_ref()], cx).await;
390    let project_context = cx.new(|_cx| ProjectContext::default());
391    let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
392    let context_server_registry =
393        cx.new(|cx| crate::ContextServerRegistry::new(context_server_store.clone(), cx));
394    let model = Arc::new(FakeLanguageModel::default());
395    let fake_model = model.as_fake();
396
397    // Create a "parent" thread to simulate the real scenario where tools are inherited
398    let parent_thread = cx.new(|cx| {
399        crate::Thread::new(
400            project.clone(),
401            cx.new(|_cx| ProjectContext::default()),
402            cx.new(|cx| crate::ContextServerRegistry::new(context_server_store.clone(), cx)),
403            crate::Templates::new(),
404            Some(model.clone()),
405            cx,
406        )
407    });
408    let parent_thread_id = parent_thread.read_with(cx, |thread, _| thread.id().to_string());
409
410    // Create parent tools that reference the parent thread
411    let parent_tools: BTreeMap<gpui::SharedString, std::sync::Arc<dyn crate::AnyAgentTool>> = {
412        let action_log = cx.new(|_| ActionLog::new(project.clone()));
413        let language_registry = project.read_with(cx, |p, _| p.languages().clone());
414        let mut tools: BTreeMap<gpui::SharedString, std::sync::Arc<dyn crate::AnyAgentTool>> =
415            BTreeMap::new();
416        tools.insert(
417            "read_file".into(),
418            crate::ReadFileTool::new(parent_thread.downgrade(), project.clone(), action_log)
419                .erase(),
420        );
421        tools.insert(
422            "edit_file".into(),
423            crate::EditFileTool::new(
424                project.clone(),
425                parent_thread.downgrade(),
426                language_registry,
427                crate::Templates::new(),
428            )
429            .erase(),
430        );
431        tools
432    };
433
434    // Create subagent context
435    let subagent_context = crate::SubagentContext {
436        parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
437        tool_use_id: language_model::LanguageModelToolUseId::from("subagent-tool-use-id"),
438        depth: 1,
439        summary_prompt: "Summarize what you changed".to_string(),
440        context_low_prompt: "Context low".to_string(),
441    };
442
443    // Create subagent - tools should be rebound to use subagent's thread
444    let subagent = cx.new(|cx| {
445        crate::Thread::new_subagent(
446            project.clone(),
447            project_context,
448            context_server_registry,
449            crate::Templates::new(),
450            model.clone(),
451            subagent_context,
452            parent_tools,
453            cx,
454        )
455    });
456
457    // Get the subagent's thread ID - it should be different from parent
458    let subagent_thread_id = subagent.read_with(cx, |thread, _| thread.id().to_string());
459    assert_ne!(
460        parent_thread_id, subagent_thread_id,
461        "Subagent should have a different thread ID than parent"
462    );
463
464    // Verify the subagent has the tools
465    subagent.read_with(cx, |thread, _| {
466        assert!(
467            thread.has_registered_tool("read_file"),
468            "subagent should have read_file tool"
469        );
470        assert!(
471            thread.has_registered_tool("edit_file"),
472            "subagent should have edit_file tool"
473        );
474    });
475
476    // Submit a user message to the subagent
477    subagent
478        .update(cx, |thread, cx| {
479            thread.submit_user_message("Update the version in config.rs to 2.0.0", cx)
480        })
481        .unwrap();
482    cx.run_until_parked();
483
484    // First, model calls read_file to see the current content
485    let read_tool_use = LanguageModelToolUse {
486        id: "read_tool_1".into(),
487        name: "read_file".into(),
488        raw_input: json!({"path": "project/src/config.rs"}).to_string(),
489        input: json!({"path": "project/src/config.rs"}),
490        is_input_complete: true,
491        thought_signature: None,
492    };
493    fake_model
494        .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(read_tool_use));
495    fake_model.end_last_completion_stream();
496    cx.run_until_parked();
497
498    // Wait for the read tool to complete and model to be called again
499    let deadline = std::time::Instant::now() + Duration::from_secs(5);
500    while fake_model.pending_completions().is_empty() {
501        if std::time::Instant::now() >= deadline {
502            panic!("Timed out waiting for model to be called after read_file tool");
503        }
504        cx.run_until_parked();
505        cx.background_executor
506            .timer(Duration::from_millis(10))
507            .await;
508    }
509
510    // Model responds and calls edit_file
511    fake_model.send_last_completion_stream_text_chunk("I'll update the version now.");
512    let edit_tool_use = LanguageModelToolUse {
513        id: "edit_tool_1".into(),
514        name: "edit_file".into(),
515        raw_input: json!({
516            "display_description": "Update version to 2.0.0",
517            "path": "project/src/config.rs",
518            "mode": "edit"
519        })
520        .to_string(),
521        input: json!({
522            "display_description": "Update version to 2.0.0",
523            "path": "project/src/config.rs",
524            "mode": "edit"
525        }),
526        is_input_complete: true,
527        thought_signature: None,
528    };
529    fake_model
530        .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(edit_tool_use));
531    fake_model.end_last_completion_stream();
532    cx.run_until_parked();
533
534    // The edit_file tool creates an EditAgent which makes its own model request.
535    // Wait for that request.
536    let deadline = std::time::Instant::now() + Duration::from_secs(5);
537    while fake_model.pending_completions().is_empty() {
538        if std::time::Instant::now() >= deadline {
539            panic!(
540                "Timed out waiting for edit agent completion request in subagent. Pending: {}",
541                fake_model.pending_completions().len()
542            );
543        }
544        cx.run_until_parked();
545        cx.background_executor
546            .timer(Duration::from_millis(10))
547            .await;
548    }
549
550    // Verify the edit agent's request uses the SUBAGENT's thread ID, not the parent's
551    let pending = fake_model.pending_completions();
552    let edit_agent_request = pending.last().unwrap();
553    let edit_agent_thread_id = edit_agent_request.thread_id.as_ref().unwrap();
554    std::assert_eq!(
555        edit_agent_thread_id,
556        &subagent_thread_id,
557        "Edit agent should use subagent's thread ID, not parent's. Got: {}, expected: {}",
558        edit_agent_thread_id,
559        subagent_thread_id
560    );
561    std::assert_ne!(
562        edit_agent_thread_id,
563        &parent_thread_id,
564        "Edit agent should NOT use parent's thread ID"
565    );
566
567    // Send the edit agent's response with the XML format it expects
568    let edit_response = "<old_text>pub const VERSION: &str = \"1.0.0\";</old_text>\n<new_text>pub const VERSION: &str = \"2.0.0\";</new_text>";
569    fake_model.send_last_completion_stream_text_chunk(edit_response);
570    fake_model.end_last_completion_stream();
571    cx.run_until_parked();
572
573    // Wait for the edit to complete and the thread to call the model again with tool results
574    let deadline = std::time::Instant::now() + Duration::from_secs(5);
575    while fake_model.pending_completions().is_empty() {
576        if std::time::Instant::now() >= deadline {
577            panic!("Timed out waiting for model to be called after edit completion in subagent");
578        }
579        cx.run_until_parked();
580        cx.background_executor
581            .timer(Duration::from_millis(10))
582            .await;
583    }
584
585    // Verify the file was edited
586    let file_content = fs
587        .load(path!("/project/src/config.rs").as_ref())
588        .await
589        .expect("file should exist");
590    assert!(
591        file_content.contains("2.0.0"),
592        "File should have been edited to contain new version. Content: {}",
593        file_content
594    );
595    assert!(
596        !file_content.contains("1.0.0"),
597        "Old version should be replaced. Content: {}",
598        file_content
599    );
600
601    // Verify the tool result was sent back to the model
602    let pending = fake_model.pending_completions();
603    assert!(
604        !pending.is_empty(),
605        "Model should have been called with tool result"
606    );
607
608    let last_request = pending.last().unwrap();
609    let has_tool_result = last_request.messages.iter().any(|m| {
610        m.content
611            .iter()
612            .any(|c| matches!(c, MessageContent::ToolResult(_)))
613    });
614    assert!(
615        has_tool_result,
616        "Tool result should be in the messages sent back to the model"
617    );
618}