1use super::*;
2use acp_thread::{AgentConnection, AgentModelGroupName, AgentModelList, UserMessageId};
3use agent_client_protocol::{self as acp};
4use agent_settings::AgentProfileId;
5use anyhow::Result;
6use client::{Client, UserStore};
7use cloud_llm_client::CompletionIntent;
8use collections::IndexMap;
9use context_server::{ContextServer, ContextServerCommand, ContextServerId};
10use feature_flags::FeatureFlagAppExt as _;
11use fs::{FakeFs, Fs};
12use futures::{
13 FutureExt as _, StreamExt,
14 channel::{
15 mpsc::{self, UnboundedReceiver},
16 oneshot,
17 },
18 future::{Fuse, Shared},
19};
20use gpui::{
21 App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal,
22 http_client::FakeHttpClient,
23};
24use indoc::indoc;
25
26use language_model::{
27 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
28 LanguageModelProviderName, LanguageModelRegistry, LanguageModelRequest,
29 LanguageModelRequestMessage, LanguageModelToolResult, LanguageModelToolSchemaFormat,
30 LanguageModelToolUse, MessageContent, Role, StopReason, fake_provider::FakeLanguageModel,
31};
32use pretty_assertions::assert_eq;
33use project::{
34 Project, context_server_store::ContextServerStore, project_settings::ProjectSettings,
35};
36use prompt_store::ProjectContext;
37use reqwest_client::ReqwestClient;
38use schemars::JsonSchema;
39use serde::{Deserialize, Serialize};
40use serde_json::json;
41use settings::{Settings, SettingsStore};
42use std::{
43 path::Path,
44 pin::Pin,
45 rc::Rc,
46 sync::{
47 Arc,
48 atomic::{AtomicBool, Ordering},
49 },
50 time::Duration,
51};
52use util::path;
53
54mod test_tools;
55use test_tools::*;
56
57fn init_test(cx: &mut TestAppContext) {
58 cx.update(|cx| {
59 let settings_store = SettingsStore::test(cx);
60 cx.set_global(settings_store);
61 });
62}
63
64struct FakeTerminalHandle {
65 killed: Arc<AtomicBool>,
66 stopped_by_user: Arc<AtomicBool>,
67 exit_sender: std::cell::RefCell<Option<futures::channel::oneshot::Sender<()>>>,
68 wait_for_exit: Shared<Task<acp::TerminalExitStatus>>,
69 output: acp::TerminalOutputResponse,
70 id: acp::TerminalId,
71}
72
73impl FakeTerminalHandle {
74 fn new_never_exits(cx: &mut App) -> Self {
75 let killed = Arc::new(AtomicBool::new(false));
76 let stopped_by_user = Arc::new(AtomicBool::new(false));
77
78 let (exit_sender, exit_receiver) = futures::channel::oneshot::channel();
79
80 let wait_for_exit = cx
81 .spawn(async move |_cx| {
82 // Wait for the exit signal (sent when kill() is called)
83 let _ = exit_receiver.await;
84 acp::TerminalExitStatus::new()
85 })
86 .shared();
87
88 Self {
89 killed,
90 stopped_by_user,
91 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
92 wait_for_exit,
93 output: acp::TerminalOutputResponse::new("partial output".to_string(), false),
94 id: acp::TerminalId::new("fake_terminal".to_string()),
95 }
96 }
97
98 fn new_with_immediate_exit(cx: &mut App, exit_code: u32) -> Self {
99 let killed = Arc::new(AtomicBool::new(false));
100 let stopped_by_user = Arc::new(AtomicBool::new(false));
101 let (exit_sender, _exit_receiver) = futures::channel::oneshot::channel();
102
103 let wait_for_exit = cx
104 .spawn(async move |_cx| acp::TerminalExitStatus::new().exit_code(exit_code))
105 .shared();
106
107 Self {
108 killed,
109 stopped_by_user,
110 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
111 wait_for_exit,
112 output: acp::TerminalOutputResponse::new("command output".to_string(), false),
113 id: acp::TerminalId::new("fake_terminal".to_string()),
114 }
115 }
116
117 fn was_killed(&self) -> bool {
118 self.killed.load(Ordering::SeqCst)
119 }
120
121 fn set_stopped_by_user(&self, stopped: bool) {
122 self.stopped_by_user.store(stopped, Ordering::SeqCst);
123 }
124
125 fn signal_exit(&self) {
126 if let Some(sender) = self.exit_sender.borrow_mut().take() {
127 let _ = sender.send(());
128 }
129 }
130}
131
132impl crate::TerminalHandle for FakeTerminalHandle {
133 fn id(&self, _cx: &AsyncApp) -> Result<acp::TerminalId> {
134 Ok(self.id.clone())
135 }
136
137 fn current_output(&self, _cx: &AsyncApp) -> Result<acp::TerminalOutputResponse> {
138 Ok(self.output.clone())
139 }
140
141 fn wait_for_exit(&self, _cx: &AsyncApp) -> Result<Shared<Task<acp::TerminalExitStatus>>> {
142 Ok(self.wait_for_exit.clone())
143 }
144
145 fn kill(&self, _cx: &AsyncApp) -> Result<()> {
146 self.killed.store(true, Ordering::SeqCst);
147 self.signal_exit();
148 Ok(())
149 }
150
151 fn was_stopped_by_user(&self, _cx: &AsyncApp) -> Result<bool> {
152 Ok(self.stopped_by_user.load(Ordering::SeqCst))
153 }
154}
155
156struct FakeThreadEnvironment {
157 handle: Rc<FakeTerminalHandle>,
158}
159
160impl crate::ThreadEnvironment for FakeThreadEnvironment {
161 fn create_terminal(
162 &self,
163 _command: String,
164 _cwd: Option<std::path::PathBuf>,
165 _output_byte_limit: Option<u64>,
166 _cx: &mut AsyncApp,
167 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
168 Task::ready(Ok(self.handle.clone() as Rc<dyn crate::TerminalHandle>))
169 }
170}
171
172/// Environment that creates multiple independent terminal handles for testing concurrent terminals.
173struct MultiTerminalEnvironment {
174 handles: std::cell::RefCell<Vec<Rc<FakeTerminalHandle>>>,
175}
176
177impl MultiTerminalEnvironment {
178 fn new() -> Self {
179 Self {
180 handles: std::cell::RefCell::new(Vec::new()),
181 }
182 }
183
184 fn handles(&self) -> Vec<Rc<FakeTerminalHandle>> {
185 self.handles.borrow().clone()
186 }
187}
188
189impl crate::ThreadEnvironment for MultiTerminalEnvironment {
190 fn create_terminal(
191 &self,
192 _command: String,
193 _cwd: Option<std::path::PathBuf>,
194 _output_byte_limit: Option<u64>,
195 cx: &mut AsyncApp,
196 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
197 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
198 self.handles.borrow_mut().push(handle.clone());
199 Task::ready(Ok(handle as Rc<dyn crate::TerminalHandle>))
200 }
201}
202
203fn always_allow_tools(cx: &mut TestAppContext) {
204 cx.update(|cx| {
205 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
206 settings.always_allow_tool_actions = true;
207 agent_settings::AgentSettings::override_global(settings, cx);
208 });
209}
210
211#[gpui::test]
212async fn test_echo(cx: &mut TestAppContext) {
213 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
214 let fake_model = model.as_fake();
215
216 let events = thread
217 .update(cx, |thread, cx| {
218 thread.send(UserMessageId::new(), ["Testing: Reply with 'Hello'"], cx)
219 })
220 .unwrap();
221 cx.run_until_parked();
222 fake_model.send_last_completion_stream_text_chunk("Hello");
223 fake_model
224 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
225 fake_model.end_last_completion_stream();
226
227 let events = events.collect().await;
228 thread.update(cx, |thread, _cx| {
229 assert_eq!(
230 thread.last_message().unwrap().to_markdown(),
231 indoc! {"
232 ## Assistant
233
234 Hello
235 "}
236 )
237 });
238 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
239}
240
241#[gpui::test]
242async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) {
243 init_test(cx);
244 always_allow_tools(cx);
245
246 let fs = FakeFs::new(cx.executor());
247 let project = Project::test(fs, [], cx).await;
248
249 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
250 let environment = Rc::new(FakeThreadEnvironment {
251 handle: handle.clone(),
252 });
253
254 #[allow(clippy::arc_with_non_send_sync)]
255 let tool = Arc::new(crate::TerminalTool::new(project, environment));
256 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
257
258 let task = cx.update(|cx| {
259 tool.run(
260 crate::TerminalToolInput {
261 command: "sleep 1000".to_string(),
262 cd: ".".to_string(),
263 timeout_ms: Some(5),
264 },
265 event_stream,
266 cx,
267 )
268 });
269
270 let update = rx.expect_update_fields().await;
271 assert!(
272 update.content.iter().any(|blocks| {
273 blocks
274 .iter()
275 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
276 }),
277 "expected tool call update to include terminal content"
278 );
279
280 let mut task_future: Pin<Box<Fuse<Task<Result<String>>>>> = Box::pin(task.fuse());
281
282 let deadline = std::time::Instant::now() + Duration::from_millis(500);
283 loop {
284 if let Some(result) = task_future.as_mut().now_or_never() {
285 let result = result.expect("terminal tool task should complete");
286
287 assert!(
288 handle.was_killed(),
289 "expected terminal handle to be killed on timeout"
290 );
291 assert!(
292 result.contains("partial output"),
293 "expected result to include terminal output, got: {result}"
294 );
295 return;
296 }
297
298 if std::time::Instant::now() >= deadline {
299 panic!("timed out waiting for terminal tool task to complete");
300 }
301
302 cx.run_until_parked();
303 cx.background_executor.timer(Duration::from_millis(1)).await;
304 }
305}
306
307#[gpui::test]
308#[ignore]
309async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) {
310 init_test(cx);
311 always_allow_tools(cx);
312
313 let fs = FakeFs::new(cx.executor());
314 let project = Project::test(fs, [], cx).await;
315
316 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
317 let environment = Rc::new(FakeThreadEnvironment {
318 handle: handle.clone(),
319 });
320
321 #[allow(clippy::arc_with_non_send_sync)]
322 let tool = Arc::new(crate::TerminalTool::new(project, environment));
323 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
324
325 let _task = cx.update(|cx| {
326 tool.run(
327 crate::TerminalToolInput {
328 command: "sleep 1000".to_string(),
329 cd: ".".to_string(),
330 timeout_ms: None,
331 },
332 event_stream,
333 cx,
334 )
335 });
336
337 let update = rx.expect_update_fields().await;
338 assert!(
339 update.content.iter().any(|blocks| {
340 blocks
341 .iter()
342 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
343 }),
344 "expected tool call update to include terminal content"
345 );
346
347 cx.background_executor
348 .timer(Duration::from_millis(25))
349 .await;
350
351 assert!(
352 !handle.was_killed(),
353 "did not expect terminal handle to be killed without a timeout"
354 );
355}
356
357#[gpui::test]
358async fn test_thinking(cx: &mut TestAppContext) {
359 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
360 let fake_model = model.as_fake();
361
362 let events = thread
363 .update(cx, |thread, cx| {
364 thread.send(
365 UserMessageId::new(),
366 [indoc! {"
367 Testing:
368
369 Generate a thinking step where you just think the word 'Think',
370 and have your final answer be 'Hello'
371 "}],
372 cx,
373 )
374 })
375 .unwrap();
376 cx.run_until_parked();
377 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Thinking {
378 text: "Think".to_string(),
379 signature: None,
380 });
381 fake_model.send_last_completion_stream_text_chunk("Hello");
382 fake_model
383 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
384 fake_model.end_last_completion_stream();
385
386 let events = events.collect().await;
387 thread.update(cx, |thread, _cx| {
388 assert_eq!(
389 thread.last_message().unwrap().to_markdown(),
390 indoc! {"
391 ## Assistant
392
393 <think>Think</think>
394 Hello
395 "}
396 )
397 });
398 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
399}
400
401#[gpui::test]
402async fn test_system_prompt(cx: &mut TestAppContext) {
403 let ThreadTest {
404 model,
405 thread,
406 project_context,
407 ..
408 } = setup(cx, TestModel::Fake).await;
409 let fake_model = model.as_fake();
410
411 project_context.update(cx, |project_context, _cx| {
412 project_context.shell = "test-shell".into()
413 });
414 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
415 thread
416 .update(cx, |thread, cx| {
417 thread.send(UserMessageId::new(), ["abc"], cx)
418 })
419 .unwrap();
420 cx.run_until_parked();
421 let mut pending_completions = fake_model.pending_completions();
422 assert_eq!(
423 pending_completions.len(),
424 1,
425 "unexpected pending completions: {:?}",
426 pending_completions
427 );
428
429 let pending_completion = pending_completions.pop().unwrap();
430 assert_eq!(pending_completion.messages[0].role, Role::System);
431
432 let system_message = &pending_completion.messages[0];
433 let system_prompt = system_message.content[0].to_str().unwrap();
434 assert!(
435 system_prompt.contains("test-shell"),
436 "unexpected system message: {:?}",
437 system_message
438 );
439 assert!(
440 system_prompt.contains("## Fixing Diagnostics"),
441 "unexpected system message: {:?}",
442 system_message
443 );
444}
445
446#[gpui::test]
447async fn test_system_prompt_without_tools(cx: &mut TestAppContext) {
448 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
449 let fake_model = model.as_fake();
450
451 thread
452 .update(cx, |thread, cx| {
453 thread.send(UserMessageId::new(), ["abc"], cx)
454 })
455 .unwrap();
456 cx.run_until_parked();
457 let mut pending_completions = fake_model.pending_completions();
458 assert_eq!(
459 pending_completions.len(),
460 1,
461 "unexpected pending completions: {:?}",
462 pending_completions
463 );
464
465 let pending_completion = pending_completions.pop().unwrap();
466 assert_eq!(pending_completion.messages[0].role, Role::System);
467
468 let system_message = &pending_completion.messages[0];
469 let system_prompt = system_message.content[0].to_str().unwrap();
470 assert!(
471 !system_prompt.contains("## Tool Use"),
472 "unexpected system message: {:?}",
473 system_message
474 );
475 assert!(
476 !system_prompt.contains("## Fixing Diagnostics"),
477 "unexpected system message: {:?}",
478 system_message
479 );
480}
481
482#[gpui::test]
483async fn test_prompt_caching(cx: &mut TestAppContext) {
484 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
485 let fake_model = model.as_fake();
486
487 // Send initial user message and verify it's cached
488 thread
489 .update(cx, |thread, cx| {
490 thread.send(UserMessageId::new(), ["Message 1"], cx)
491 })
492 .unwrap();
493 cx.run_until_parked();
494
495 let completion = fake_model.pending_completions().pop().unwrap();
496 assert_eq!(
497 completion.messages[1..],
498 vec![LanguageModelRequestMessage {
499 role: Role::User,
500 content: vec!["Message 1".into()],
501 cache: true,
502 reasoning_details: None,
503 }]
504 );
505 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
506 "Response to Message 1".into(),
507 ));
508 fake_model.end_last_completion_stream();
509 cx.run_until_parked();
510
511 // Send another user message and verify only the latest is cached
512 thread
513 .update(cx, |thread, cx| {
514 thread.send(UserMessageId::new(), ["Message 2"], cx)
515 })
516 .unwrap();
517 cx.run_until_parked();
518
519 let completion = fake_model.pending_completions().pop().unwrap();
520 assert_eq!(
521 completion.messages[1..],
522 vec![
523 LanguageModelRequestMessage {
524 role: Role::User,
525 content: vec!["Message 1".into()],
526 cache: false,
527 reasoning_details: None,
528 },
529 LanguageModelRequestMessage {
530 role: Role::Assistant,
531 content: vec!["Response to Message 1".into()],
532 cache: false,
533 reasoning_details: None,
534 },
535 LanguageModelRequestMessage {
536 role: Role::User,
537 content: vec!["Message 2".into()],
538 cache: true,
539 reasoning_details: None,
540 }
541 ]
542 );
543 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
544 "Response to Message 2".into(),
545 ));
546 fake_model.end_last_completion_stream();
547 cx.run_until_parked();
548
549 // Simulate a tool call and verify that the latest tool result is cached
550 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
551 thread
552 .update(cx, |thread, cx| {
553 thread.send(UserMessageId::new(), ["Use the echo tool"], cx)
554 })
555 .unwrap();
556 cx.run_until_parked();
557
558 let tool_use = LanguageModelToolUse {
559 id: "tool_1".into(),
560 name: EchoTool::name().into(),
561 raw_input: json!({"text": "test"}).to_string(),
562 input: json!({"text": "test"}),
563 is_input_complete: true,
564 thought_signature: None,
565 };
566 fake_model
567 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
568 fake_model.end_last_completion_stream();
569 cx.run_until_parked();
570
571 let completion = fake_model.pending_completions().pop().unwrap();
572 let tool_result = LanguageModelToolResult {
573 tool_use_id: "tool_1".into(),
574 tool_name: EchoTool::name().into(),
575 is_error: false,
576 content: "test".into(),
577 output: Some("test".into()),
578 };
579 assert_eq!(
580 completion.messages[1..],
581 vec![
582 LanguageModelRequestMessage {
583 role: Role::User,
584 content: vec!["Message 1".into()],
585 cache: false,
586 reasoning_details: None,
587 },
588 LanguageModelRequestMessage {
589 role: Role::Assistant,
590 content: vec!["Response to Message 1".into()],
591 cache: false,
592 reasoning_details: None,
593 },
594 LanguageModelRequestMessage {
595 role: Role::User,
596 content: vec!["Message 2".into()],
597 cache: false,
598 reasoning_details: None,
599 },
600 LanguageModelRequestMessage {
601 role: Role::Assistant,
602 content: vec!["Response to Message 2".into()],
603 cache: false,
604 reasoning_details: None,
605 },
606 LanguageModelRequestMessage {
607 role: Role::User,
608 content: vec!["Use the echo tool".into()],
609 cache: false,
610 reasoning_details: None,
611 },
612 LanguageModelRequestMessage {
613 role: Role::Assistant,
614 content: vec![MessageContent::ToolUse(tool_use)],
615 cache: false,
616 reasoning_details: None,
617 },
618 LanguageModelRequestMessage {
619 role: Role::User,
620 content: vec![MessageContent::ToolResult(tool_result)],
621 cache: true,
622 reasoning_details: None,
623 }
624 ]
625 );
626}
627
628#[gpui::test]
629#[cfg_attr(not(feature = "e2e"), ignore)]
630async fn test_basic_tool_calls(cx: &mut TestAppContext) {
631 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
632
633 // Test a tool call that's likely to complete *before* streaming stops.
634 let events = thread
635 .update(cx, |thread, cx| {
636 thread.add_tool(EchoTool);
637 thread.send(
638 UserMessageId::new(),
639 ["Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'."],
640 cx,
641 )
642 })
643 .unwrap()
644 .collect()
645 .await;
646 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
647
648 // Test a tool calls that's likely to complete *after* streaming stops.
649 let events = thread
650 .update(cx, |thread, cx| {
651 thread.remove_tool(&EchoTool::name());
652 thread.add_tool(DelayTool);
653 thread.send(
654 UserMessageId::new(),
655 [
656 "Now call the delay tool with 200ms.",
657 "When the timer goes off, then you echo the output of the tool.",
658 ],
659 cx,
660 )
661 })
662 .unwrap()
663 .collect()
664 .await;
665 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
666 thread.update(cx, |thread, _cx| {
667 assert!(
668 thread
669 .last_message()
670 .unwrap()
671 .as_agent_message()
672 .unwrap()
673 .content
674 .iter()
675 .any(|content| {
676 if let AgentMessageContent::Text(text) = content {
677 text.contains("Ding")
678 } else {
679 false
680 }
681 }),
682 "{}",
683 thread.to_markdown()
684 );
685 });
686}
687
688#[gpui::test]
689#[cfg_attr(not(feature = "e2e"), ignore)]
690async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
691 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
692
693 // Test a tool call that's likely to complete *before* streaming stops.
694 let mut events = thread
695 .update(cx, |thread, cx| {
696 thread.add_tool(WordListTool);
697 thread.send(UserMessageId::new(), ["Test the word_list tool."], cx)
698 })
699 .unwrap();
700
701 let mut saw_partial_tool_use = false;
702 while let Some(event) = events.next().await {
703 if let Ok(ThreadEvent::ToolCall(tool_call)) = event {
704 thread.update(cx, |thread, _cx| {
705 // Look for a tool use in the thread's last message
706 let message = thread.last_message().unwrap();
707 let agent_message = message.as_agent_message().unwrap();
708 let last_content = agent_message.content.last().unwrap();
709 if let AgentMessageContent::ToolUse(last_tool_use) = last_content {
710 assert_eq!(last_tool_use.name.as_ref(), "word_list");
711 if tool_call.status == acp::ToolCallStatus::Pending {
712 if !last_tool_use.is_input_complete
713 && last_tool_use.input.get("g").is_none()
714 {
715 saw_partial_tool_use = true;
716 }
717 } else {
718 last_tool_use
719 .input
720 .get("a")
721 .expect("'a' has streamed because input is now complete");
722 last_tool_use
723 .input
724 .get("g")
725 .expect("'g' has streamed because input is now complete");
726 }
727 } else {
728 panic!("last content should be a tool use");
729 }
730 });
731 }
732 }
733
734 assert!(
735 saw_partial_tool_use,
736 "should see at least one partially streamed tool use in the history"
737 );
738}
739
740#[gpui::test]
741async fn test_tool_authorization(cx: &mut TestAppContext) {
742 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
743 let fake_model = model.as_fake();
744
745 let mut events = thread
746 .update(cx, |thread, cx| {
747 thread.add_tool(ToolRequiringPermission);
748 thread.send(UserMessageId::new(), ["abc"], cx)
749 })
750 .unwrap();
751 cx.run_until_parked();
752 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
753 LanguageModelToolUse {
754 id: "tool_id_1".into(),
755 name: ToolRequiringPermission::name().into(),
756 raw_input: "{}".into(),
757 input: json!({}),
758 is_input_complete: true,
759 thought_signature: None,
760 },
761 ));
762 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
763 LanguageModelToolUse {
764 id: "tool_id_2".into(),
765 name: ToolRequiringPermission::name().into(),
766 raw_input: "{}".into(),
767 input: json!({}),
768 is_input_complete: true,
769 thought_signature: None,
770 },
771 ));
772 fake_model.end_last_completion_stream();
773 let tool_call_auth_1 = next_tool_call_authorization(&mut events).await;
774 let tool_call_auth_2 = next_tool_call_authorization(&mut events).await;
775
776 // Approve the first
777 tool_call_auth_1
778 .response
779 .send(tool_call_auth_1.options[1].option_id.clone())
780 .unwrap();
781 cx.run_until_parked();
782
783 // Reject the second
784 tool_call_auth_2
785 .response
786 .send(tool_call_auth_1.options[2].option_id.clone())
787 .unwrap();
788 cx.run_until_parked();
789
790 let completion = fake_model.pending_completions().pop().unwrap();
791 let message = completion.messages.last().unwrap();
792 assert_eq!(
793 message.content,
794 vec![
795 language_model::MessageContent::ToolResult(LanguageModelToolResult {
796 tool_use_id: tool_call_auth_1.tool_call.tool_call_id.0.to_string().into(),
797 tool_name: ToolRequiringPermission::name().into(),
798 is_error: false,
799 content: "Allowed".into(),
800 output: Some("Allowed".into())
801 }),
802 language_model::MessageContent::ToolResult(LanguageModelToolResult {
803 tool_use_id: tool_call_auth_2.tool_call.tool_call_id.0.to_string().into(),
804 tool_name: ToolRequiringPermission::name().into(),
805 is_error: true,
806 content: "Permission to run tool denied by user".into(),
807 output: Some("Permission to run tool denied by user".into())
808 })
809 ]
810 );
811
812 // Simulate yet another tool call.
813 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
814 LanguageModelToolUse {
815 id: "tool_id_3".into(),
816 name: ToolRequiringPermission::name().into(),
817 raw_input: "{}".into(),
818 input: json!({}),
819 is_input_complete: true,
820 thought_signature: None,
821 },
822 ));
823 fake_model.end_last_completion_stream();
824
825 // Respond by always allowing tools.
826 let tool_call_auth_3 = next_tool_call_authorization(&mut events).await;
827 tool_call_auth_3
828 .response
829 .send(tool_call_auth_3.options[0].option_id.clone())
830 .unwrap();
831 cx.run_until_parked();
832 let completion = fake_model.pending_completions().pop().unwrap();
833 let message = completion.messages.last().unwrap();
834 assert_eq!(
835 message.content,
836 vec![language_model::MessageContent::ToolResult(
837 LanguageModelToolResult {
838 tool_use_id: tool_call_auth_3.tool_call.tool_call_id.0.to_string().into(),
839 tool_name: ToolRequiringPermission::name().into(),
840 is_error: false,
841 content: "Allowed".into(),
842 output: Some("Allowed".into())
843 }
844 )]
845 );
846
847 // Simulate a final tool call, ensuring we don't trigger authorization.
848 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
849 LanguageModelToolUse {
850 id: "tool_id_4".into(),
851 name: ToolRequiringPermission::name().into(),
852 raw_input: "{}".into(),
853 input: json!({}),
854 is_input_complete: true,
855 thought_signature: None,
856 },
857 ));
858 fake_model.end_last_completion_stream();
859 cx.run_until_parked();
860 let completion = fake_model.pending_completions().pop().unwrap();
861 let message = completion.messages.last().unwrap();
862 assert_eq!(
863 message.content,
864 vec![language_model::MessageContent::ToolResult(
865 LanguageModelToolResult {
866 tool_use_id: "tool_id_4".into(),
867 tool_name: ToolRequiringPermission::name().into(),
868 is_error: false,
869 content: "Allowed".into(),
870 output: Some("Allowed".into())
871 }
872 )]
873 );
874}
875
876#[gpui::test]
877async fn test_tool_hallucination(cx: &mut TestAppContext) {
878 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
879 let fake_model = model.as_fake();
880
881 let mut events = thread
882 .update(cx, |thread, cx| {
883 thread.send(UserMessageId::new(), ["abc"], cx)
884 })
885 .unwrap();
886 cx.run_until_parked();
887 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
888 LanguageModelToolUse {
889 id: "tool_id_1".into(),
890 name: "nonexistent_tool".into(),
891 raw_input: "{}".into(),
892 input: json!({}),
893 is_input_complete: true,
894 thought_signature: None,
895 },
896 ));
897 fake_model.end_last_completion_stream();
898
899 let tool_call = expect_tool_call(&mut events).await;
900 assert_eq!(tool_call.title, "nonexistent_tool");
901 assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
902 let update = expect_tool_call_update_fields(&mut events).await;
903 assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
904}
905
906#[gpui::test]
907async fn test_resume_after_tool_use_limit(cx: &mut TestAppContext) {
908 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
909 let fake_model = model.as_fake();
910
911 let events = thread
912 .update(cx, |thread, cx| {
913 thread.add_tool(EchoTool);
914 thread.send(UserMessageId::new(), ["abc"], cx)
915 })
916 .unwrap();
917 cx.run_until_parked();
918 let tool_use = LanguageModelToolUse {
919 id: "tool_id_1".into(),
920 name: EchoTool::name().into(),
921 raw_input: "{}".into(),
922 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
923 is_input_complete: true,
924 thought_signature: None,
925 };
926 fake_model
927 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
928 fake_model.end_last_completion_stream();
929
930 cx.run_until_parked();
931 let completion = fake_model.pending_completions().pop().unwrap();
932 let tool_result = LanguageModelToolResult {
933 tool_use_id: "tool_id_1".into(),
934 tool_name: EchoTool::name().into(),
935 is_error: false,
936 content: "def".into(),
937 output: Some("def".into()),
938 };
939 assert_eq!(
940 completion.messages[1..],
941 vec![
942 LanguageModelRequestMessage {
943 role: Role::User,
944 content: vec!["abc".into()],
945 cache: false,
946 reasoning_details: None,
947 },
948 LanguageModelRequestMessage {
949 role: Role::Assistant,
950 content: vec![MessageContent::ToolUse(tool_use.clone())],
951 cache: false,
952 reasoning_details: None,
953 },
954 LanguageModelRequestMessage {
955 role: Role::User,
956 content: vec![MessageContent::ToolResult(tool_result.clone())],
957 cache: true,
958 reasoning_details: None,
959 },
960 ]
961 );
962
963 // Simulate reaching tool use limit.
964 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
965 fake_model.end_last_completion_stream();
966 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
967 assert!(
968 last_event
969 .unwrap_err()
970 .is::<language_model::ToolUseLimitReachedError>()
971 );
972
973 let events = thread.update(cx, |thread, cx| thread.resume(cx)).unwrap();
974 cx.run_until_parked();
975 let completion = fake_model.pending_completions().pop().unwrap();
976 assert_eq!(
977 completion.messages[1..],
978 vec![
979 LanguageModelRequestMessage {
980 role: Role::User,
981 content: vec!["abc".into()],
982 cache: false,
983 reasoning_details: None,
984 },
985 LanguageModelRequestMessage {
986 role: Role::Assistant,
987 content: vec![MessageContent::ToolUse(tool_use)],
988 cache: false,
989 reasoning_details: None,
990 },
991 LanguageModelRequestMessage {
992 role: Role::User,
993 content: vec![MessageContent::ToolResult(tool_result)],
994 cache: false,
995 reasoning_details: None,
996 },
997 LanguageModelRequestMessage {
998 role: Role::User,
999 content: vec!["Continue where you left off".into()],
1000 cache: true,
1001 reasoning_details: None,
1002 }
1003 ]
1004 );
1005
1006 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text("Done".into()));
1007 fake_model.end_last_completion_stream();
1008 events.collect::<Vec<_>>().await;
1009 thread.read_with(cx, |thread, _cx| {
1010 assert_eq!(
1011 thread.last_message().unwrap().to_markdown(),
1012 indoc! {"
1013 ## Assistant
1014
1015 Done
1016 "}
1017 )
1018 });
1019}
1020
1021#[gpui::test]
1022async fn test_send_after_tool_use_limit(cx: &mut TestAppContext) {
1023 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1024 let fake_model = model.as_fake();
1025
1026 let events = thread
1027 .update(cx, |thread, cx| {
1028 thread.add_tool(EchoTool);
1029 thread.send(UserMessageId::new(), ["abc"], cx)
1030 })
1031 .unwrap();
1032 cx.run_until_parked();
1033
1034 let tool_use = LanguageModelToolUse {
1035 id: "tool_id_1".into(),
1036 name: EchoTool::name().into(),
1037 raw_input: "{}".into(),
1038 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
1039 is_input_complete: true,
1040 thought_signature: None,
1041 };
1042 let tool_result = LanguageModelToolResult {
1043 tool_use_id: "tool_id_1".into(),
1044 tool_name: EchoTool::name().into(),
1045 is_error: false,
1046 content: "def".into(),
1047 output: Some("def".into()),
1048 };
1049 fake_model
1050 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
1051 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
1052 fake_model.end_last_completion_stream();
1053 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
1054 assert!(
1055 last_event
1056 .unwrap_err()
1057 .is::<language_model::ToolUseLimitReachedError>()
1058 );
1059
1060 thread
1061 .update(cx, |thread, cx| {
1062 thread.send(UserMessageId::new(), vec!["ghi"], cx)
1063 })
1064 .unwrap();
1065 cx.run_until_parked();
1066 let completion = fake_model.pending_completions().pop().unwrap();
1067 assert_eq!(
1068 completion.messages[1..],
1069 vec![
1070 LanguageModelRequestMessage {
1071 role: Role::User,
1072 content: vec!["abc".into()],
1073 cache: false,
1074 reasoning_details: None,
1075 },
1076 LanguageModelRequestMessage {
1077 role: Role::Assistant,
1078 content: vec![MessageContent::ToolUse(tool_use)],
1079 cache: false,
1080 reasoning_details: None,
1081 },
1082 LanguageModelRequestMessage {
1083 role: Role::User,
1084 content: vec![MessageContent::ToolResult(tool_result)],
1085 cache: false,
1086 reasoning_details: None,
1087 },
1088 LanguageModelRequestMessage {
1089 role: Role::User,
1090 content: vec!["ghi".into()],
1091 cache: true,
1092 reasoning_details: None,
1093 }
1094 ]
1095 );
1096}
1097
1098async fn expect_tool_call(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::ToolCall {
1099 let event = events
1100 .next()
1101 .await
1102 .expect("no tool call authorization event received")
1103 .unwrap();
1104 match event {
1105 ThreadEvent::ToolCall(tool_call) => tool_call,
1106 event => {
1107 panic!("Unexpected event {event:?}");
1108 }
1109 }
1110}
1111
1112async fn expect_tool_call_update_fields(
1113 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1114) -> acp::ToolCallUpdate {
1115 let event = events
1116 .next()
1117 .await
1118 .expect("no tool call authorization event received")
1119 .unwrap();
1120 match event {
1121 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(update)) => update,
1122 event => {
1123 panic!("Unexpected event {event:?}");
1124 }
1125 }
1126}
1127
1128async fn next_tool_call_authorization(
1129 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1130) -> ToolCallAuthorization {
1131 loop {
1132 let event = events
1133 .next()
1134 .await
1135 .expect("no tool call authorization event received")
1136 .unwrap();
1137 if let ThreadEvent::ToolCallAuthorization(tool_call_authorization) = event {
1138 let permission_kinds = tool_call_authorization
1139 .options
1140 .iter()
1141 .map(|o| o.kind)
1142 .collect::<Vec<_>>();
1143 assert_eq!(
1144 permission_kinds,
1145 vec![
1146 acp::PermissionOptionKind::AllowAlways,
1147 acp::PermissionOptionKind::AllowOnce,
1148 acp::PermissionOptionKind::RejectOnce,
1149 ]
1150 );
1151 return tool_call_authorization;
1152 }
1153 }
1154}
1155
1156#[gpui::test]
1157#[cfg_attr(not(feature = "e2e"), ignore)]
1158async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
1159 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1160
1161 // Test concurrent tool calls with different delay times
1162 let events = thread
1163 .update(cx, |thread, cx| {
1164 thread.add_tool(DelayTool);
1165 thread.send(
1166 UserMessageId::new(),
1167 [
1168 "Call the delay tool twice in the same message.",
1169 "Once with 100ms. Once with 300ms.",
1170 "When both timers are complete, describe the outputs.",
1171 ],
1172 cx,
1173 )
1174 })
1175 .unwrap()
1176 .collect()
1177 .await;
1178
1179 let stop_reasons = stop_events(events);
1180 assert_eq!(stop_reasons, vec![acp::StopReason::EndTurn]);
1181
1182 thread.update(cx, |thread, _cx| {
1183 let last_message = thread.last_message().unwrap();
1184 let agent_message = last_message.as_agent_message().unwrap();
1185 let text = agent_message
1186 .content
1187 .iter()
1188 .filter_map(|content| {
1189 if let AgentMessageContent::Text(text) = content {
1190 Some(text.as_str())
1191 } else {
1192 None
1193 }
1194 })
1195 .collect::<String>();
1196
1197 assert!(text.contains("Ding"));
1198 });
1199}
1200
1201#[gpui::test]
1202async fn test_profiles(cx: &mut TestAppContext) {
1203 let ThreadTest {
1204 model, thread, fs, ..
1205 } = setup(cx, TestModel::Fake).await;
1206 let fake_model = model.as_fake();
1207
1208 thread.update(cx, |thread, _cx| {
1209 thread.add_tool(DelayTool);
1210 thread.add_tool(EchoTool);
1211 thread.add_tool(InfiniteTool);
1212 });
1213
1214 // Override profiles and wait for settings to be loaded.
1215 fs.insert_file(
1216 paths::settings_file(),
1217 json!({
1218 "agent": {
1219 "profiles": {
1220 "test-1": {
1221 "name": "Test Profile 1",
1222 "tools": {
1223 EchoTool::name(): true,
1224 DelayTool::name(): true,
1225 }
1226 },
1227 "test-2": {
1228 "name": "Test Profile 2",
1229 "tools": {
1230 InfiniteTool::name(): true,
1231 }
1232 }
1233 }
1234 }
1235 })
1236 .to_string()
1237 .into_bytes(),
1238 )
1239 .await;
1240 cx.run_until_parked();
1241
1242 // Test that test-1 profile (default) has echo and delay tools
1243 thread
1244 .update(cx, |thread, cx| {
1245 thread.set_profile(AgentProfileId("test-1".into()), cx);
1246 thread.send(UserMessageId::new(), ["test"], cx)
1247 })
1248 .unwrap();
1249 cx.run_until_parked();
1250
1251 let mut pending_completions = fake_model.pending_completions();
1252 assert_eq!(pending_completions.len(), 1);
1253 let completion = pending_completions.pop().unwrap();
1254 let tool_names: Vec<String> = completion
1255 .tools
1256 .iter()
1257 .map(|tool| tool.name.clone())
1258 .collect();
1259 assert_eq!(tool_names, vec![DelayTool::name(), EchoTool::name()]);
1260 fake_model.end_last_completion_stream();
1261
1262 // Switch to test-2 profile, and verify that it has only the infinite tool.
1263 thread
1264 .update(cx, |thread, cx| {
1265 thread.set_profile(AgentProfileId("test-2".into()), cx);
1266 thread.send(UserMessageId::new(), ["test2"], cx)
1267 })
1268 .unwrap();
1269 cx.run_until_parked();
1270 let mut pending_completions = fake_model.pending_completions();
1271 assert_eq!(pending_completions.len(), 1);
1272 let completion = pending_completions.pop().unwrap();
1273 let tool_names: Vec<String> = completion
1274 .tools
1275 .iter()
1276 .map(|tool| tool.name.clone())
1277 .collect();
1278 assert_eq!(tool_names, vec![InfiniteTool::name()]);
1279}
1280
1281#[gpui::test]
1282async fn test_mcp_tools(cx: &mut TestAppContext) {
1283 let ThreadTest {
1284 model,
1285 thread,
1286 context_server_store,
1287 fs,
1288 ..
1289 } = setup(cx, TestModel::Fake).await;
1290 let fake_model = model.as_fake();
1291
1292 // Override profiles and wait for settings to be loaded.
1293 fs.insert_file(
1294 paths::settings_file(),
1295 json!({
1296 "agent": {
1297 "always_allow_tool_actions": true,
1298 "profiles": {
1299 "test": {
1300 "name": "Test Profile",
1301 "enable_all_context_servers": true,
1302 "tools": {
1303 EchoTool::name(): true,
1304 }
1305 },
1306 }
1307 }
1308 })
1309 .to_string()
1310 .into_bytes(),
1311 )
1312 .await;
1313 cx.run_until_parked();
1314 thread.update(cx, |thread, cx| {
1315 thread.set_profile(AgentProfileId("test".into()), cx)
1316 });
1317
1318 let mut mcp_tool_calls = setup_context_server(
1319 "test_server",
1320 vec![context_server::types::Tool {
1321 name: "echo".into(),
1322 description: None,
1323 input_schema: serde_json::to_value(EchoTool::input_schema(
1324 LanguageModelToolSchemaFormat::JsonSchema,
1325 ))
1326 .unwrap(),
1327 output_schema: None,
1328 annotations: None,
1329 }],
1330 &context_server_store,
1331 cx,
1332 );
1333
1334 let events = thread.update(cx, |thread, cx| {
1335 thread.send(UserMessageId::new(), ["Hey"], cx).unwrap()
1336 });
1337 cx.run_until_parked();
1338
1339 // Simulate the model calling the MCP tool.
1340 let completion = fake_model.pending_completions().pop().unwrap();
1341 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1342 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1343 LanguageModelToolUse {
1344 id: "tool_1".into(),
1345 name: "echo".into(),
1346 raw_input: json!({"text": "test"}).to_string(),
1347 input: json!({"text": "test"}),
1348 is_input_complete: true,
1349 thought_signature: None,
1350 },
1351 ));
1352 fake_model.end_last_completion_stream();
1353 cx.run_until_parked();
1354
1355 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1356 assert_eq!(tool_call_params.name, "echo");
1357 assert_eq!(tool_call_params.arguments, Some(json!({"text": "test"})));
1358 tool_call_response
1359 .send(context_server::types::CallToolResponse {
1360 content: vec![context_server::types::ToolResponseContent::Text {
1361 text: "test".into(),
1362 }],
1363 is_error: None,
1364 meta: None,
1365 structured_content: None,
1366 })
1367 .unwrap();
1368 cx.run_until_parked();
1369
1370 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1371 fake_model.send_last_completion_stream_text_chunk("Done!");
1372 fake_model.end_last_completion_stream();
1373 events.collect::<Vec<_>>().await;
1374
1375 // Send again after adding the echo tool, ensuring the name collision is resolved.
1376 let events = thread.update(cx, |thread, cx| {
1377 thread.add_tool(EchoTool);
1378 thread.send(UserMessageId::new(), ["Go"], cx).unwrap()
1379 });
1380 cx.run_until_parked();
1381 let completion = fake_model.pending_completions().pop().unwrap();
1382 assert_eq!(
1383 tool_names_for_completion(&completion),
1384 vec!["echo", "test_server_echo"]
1385 );
1386 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1387 LanguageModelToolUse {
1388 id: "tool_2".into(),
1389 name: "test_server_echo".into(),
1390 raw_input: json!({"text": "mcp"}).to_string(),
1391 input: json!({"text": "mcp"}),
1392 is_input_complete: true,
1393 thought_signature: None,
1394 },
1395 ));
1396 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1397 LanguageModelToolUse {
1398 id: "tool_3".into(),
1399 name: "echo".into(),
1400 raw_input: json!({"text": "native"}).to_string(),
1401 input: json!({"text": "native"}),
1402 is_input_complete: true,
1403 thought_signature: None,
1404 },
1405 ));
1406 fake_model.end_last_completion_stream();
1407 cx.run_until_parked();
1408
1409 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1410 assert_eq!(tool_call_params.name, "echo");
1411 assert_eq!(tool_call_params.arguments, Some(json!({"text": "mcp"})));
1412 tool_call_response
1413 .send(context_server::types::CallToolResponse {
1414 content: vec![context_server::types::ToolResponseContent::Text { text: "mcp".into() }],
1415 is_error: None,
1416 meta: None,
1417 structured_content: None,
1418 })
1419 .unwrap();
1420 cx.run_until_parked();
1421
1422 // Ensure the tool results were inserted with the correct names.
1423 let completion = fake_model.pending_completions().pop().unwrap();
1424 assert_eq!(
1425 completion.messages.last().unwrap().content,
1426 vec![
1427 MessageContent::ToolResult(LanguageModelToolResult {
1428 tool_use_id: "tool_3".into(),
1429 tool_name: "echo".into(),
1430 is_error: false,
1431 content: "native".into(),
1432 output: Some("native".into()),
1433 },),
1434 MessageContent::ToolResult(LanguageModelToolResult {
1435 tool_use_id: "tool_2".into(),
1436 tool_name: "test_server_echo".into(),
1437 is_error: false,
1438 content: "mcp".into(),
1439 output: Some("mcp".into()),
1440 },),
1441 ]
1442 );
1443 fake_model.end_last_completion_stream();
1444 events.collect::<Vec<_>>().await;
1445}
1446
1447#[gpui::test]
1448async fn test_mcp_tool_truncation(cx: &mut TestAppContext) {
1449 let ThreadTest {
1450 model,
1451 thread,
1452 context_server_store,
1453 fs,
1454 ..
1455 } = setup(cx, TestModel::Fake).await;
1456 let fake_model = model.as_fake();
1457
1458 // Set up a profile with all tools enabled
1459 fs.insert_file(
1460 paths::settings_file(),
1461 json!({
1462 "agent": {
1463 "profiles": {
1464 "test": {
1465 "name": "Test Profile",
1466 "enable_all_context_servers": true,
1467 "tools": {
1468 EchoTool::name(): true,
1469 DelayTool::name(): true,
1470 WordListTool::name(): true,
1471 ToolRequiringPermission::name(): true,
1472 InfiniteTool::name(): true,
1473 }
1474 },
1475 }
1476 }
1477 })
1478 .to_string()
1479 .into_bytes(),
1480 )
1481 .await;
1482 cx.run_until_parked();
1483
1484 thread.update(cx, |thread, cx| {
1485 thread.set_profile(AgentProfileId("test".into()), cx);
1486 thread.add_tool(EchoTool);
1487 thread.add_tool(DelayTool);
1488 thread.add_tool(WordListTool);
1489 thread.add_tool(ToolRequiringPermission);
1490 thread.add_tool(InfiniteTool);
1491 });
1492
1493 // Set up multiple context servers with some overlapping tool names
1494 let _server1_calls = setup_context_server(
1495 "xxx",
1496 vec![
1497 context_server::types::Tool {
1498 name: "echo".into(), // Conflicts with native EchoTool
1499 description: None,
1500 input_schema: serde_json::to_value(EchoTool::input_schema(
1501 LanguageModelToolSchemaFormat::JsonSchema,
1502 ))
1503 .unwrap(),
1504 output_schema: None,
1505 annotations: None,
1506 },
1507 context_server::types::Tool {
1508 name: "unique_tool_1".into(),
1509 description: None,
1510 input_schema: json!({"type": "object", "properties": {}}),
1511 output_schema: None,
1512 annotations: None,
1513 },
1514 ],
1515 &context_server_store,
1516 cx,
1517 );
1518
1519 let _server2_calls = setup_context_server(
1520 "yyy",
1521 vec![
1522 context_server::types::Tool {
1523 name: "echo".into(), // Also conflicts with native EchoTool
1524 description: None,
1525 input_schema: serde_json::to_value(EchoTool::input_schema(
1526 LanguageModelToolSchemaFormat::JsonSchema,
1527 ))
1528 .unwrap(),
1529 output_schema: None,
1530 annotations: None,
1531 },
1532 context_server::types::Tool {
1533 name: "unique_tool_2".into(),
1534 description: None,
1535 input_schema: json!({"type": "object", "properties": {}}),
1536 output_schema: None,
1537 annotations: None,
1538 },
1539 context_server::types::Tool {
1540 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1541 description: None,
1542 input_schema: json!({"type": "object", "properties": {}}),
1543 output_schema: None,
1544 annotations: None,
1545 },
1546 context_server::types::Tool {
1547 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1548 description: None,
1549 input_schema: json!({"type": "object", "properties": {}}),
1550 output_schema: None,
1551 annotations: None,
1552 },
1553 ],
1554 &context_server_store,
1555 cx,
1556 );
1557 let _server3_calls = setup_context_server(
1558 "zzz",
1559 vec![
1560 context_server::types::Tool {
1561 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1562 description: None,
1563 input_schema: json!({"type": "object", "properties": {}}),
1564 output_schema: None,
1565 annotations: None,
1566 },
1567 context_server::types::Tool {
1568 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1569 description: None,
1570 input_schema: json!({"type": "object", "properties": {}}),
1571 output_schema: None,
1572 annotations: None,
1573 },
1574 context_server::types::Tool {
1575 name: "c".repeat(MAX_TOOL_NAME_LENGTH + 1),
1576 description: None,
1577 input_schema: json!({"type": "object", "properties": {}}),
1578 output_schema: None,
1579 annotations: None,
1580 },
1581 ],
1582 &context_server_store,
1583 cx,
1584 );
1585
1586 thread
1587 .update(cx, |thread, cx| {
1588 thread.send(UserMessageId::new(), ["Go"], cx)
1589 })
1590 .unwrap();
1591 cx.run_until_parked();
1592 let completion = fake_model.pending_completions().pop().unwrap();
1593 assert_eq!(
1594 tool_names_for_completion(&completion),
1595 vec![
1596 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1597 "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
1598 "delay",
1599 "echo",
1600 "infinite",
1601 "tool_requiring_permission",
1602 "unique_tool_1",
1603 "unique_tool_2",
1604 "word_list",
1605 "xxx_echo",
1606 "y_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1607 "yyy_echo",
1608 "z_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1609 ]
1610 );
1611}
1612
1613#[gpui::test]
1614#[cfg_attr(not(feature = "e2e"), ignore)]
1615async fn test_cancellation(cx: &mut TestAppContext) {
1616 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1617
1618 let mut events = thread
1619 .update(cx, |thread, cx| {
1620 thread.add_tool(InfiniteTool);
1621 thread.add_tool(EchoTool);
1622 thread.send(
1623 UserMessageId::new(),
1624 ["Call the echo tool, then call the infinite tool, then explain their output"],
1625 cx,
1626 )
1627 })
1628 .unwrap();
1629
1630 // Wait until both tools are called.
1631 let mut expected_tools = vec!["Echo", "Infinite Tool"];
1632 let mut echo_id = None;
1633 let mut echo_completed = false;
1634 while let Some(event) = events.next().await {
1635 match event.unwrap() {
1636 ThreadEvent::ToolCall(tool_call) => {
1637 assert_eq!(tool_call.title, expected_tools.remove(0));
1638 if tool_call.title == "Echo" {
1639 echo_id = Some(tool_call.tool_call_id);
1640 }
1641 }
1642 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1643 acp::ToolCallUpdate {
1644 tool_call_id,
1645 fields:
1646 acp::ToolCallUpdateFields {
1647 status: Some(acp::ToolCallStatus::Completed),
1648 ..
1649 },
1650 ..
1651 },
1652 )) if Some(&tool_call_id) == echo_id.as_ref() => {
1653 echo_completed = true;
1654 }
1655 _ => {}
1656 }
1657
1658 if expected_tools.is_empty() && echo_completed {
1659 break;
1660 }
1661 }
1662
1663 // Cancel the current send and ensure that the event stream is closed, even
1664 // if one of the tools is still running.
1665 thread.update(cx, |thread, cx| thread.cancel(cx)).await;
1666 let events = events.collect::<Vec<_>>().await;
1667 let last_event = events.last();
1668 assert!(
1669 matches!(
1670 last_event,
1671 Some(Ok(ThreadEvent::Stop(acp::StopReason::Cancelled)))
1672 ),
1673 "unexpected event {last_event:?}"
1674 );
1675
1676 // Ensure we can still send a new message after cancellation.
1677 let events = thread
1678 .update(cx, |thread, cx| {
1679 thread.send(
1680 UserMessageId::new(),
1681 ["Testing: reply with 'Hello' then stop."],
1682 cx,
1683 )
1684 })
1685 .unwrap()
1686 .collect::<Vec<_>>()
1687 .await;
1688 thread.update(cx, |thread, _cx| {
1689 let message = thread.last_message().unwrap();
1690 let agent_message = message.as_agent_message().unwrap();
1691 assert_eq!(
1692 agent_message.content,
1693 vec![AgentMessageContent::Text("Hello".to_string())]
1694 );
1695 });
1696 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1697}
1698
1699#[gpui::test]
1700async fn test_terminal_tool_cancellation_captures_output(cx: &mut TestAppContext) {
1701 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1702 always_allow_tools(cx);
1703 let fake_model = model.as_fake();
1704
1705 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1706 let environment = Rc::new(FakeThreadEnvironment {
1707 handle: handle.clone(),
1708 });
1709
1710 let mut events = thread
1711 .update(cx, |thread, cx| {
1712 thread.add_tool(crate::TerminalTool::new(
1713 thread.project().clone(),
1714 environment,
1715 ));
1716 thread.send(UserMessageId::new(), ["run a command"], cx)
1717 })
1718 .unwrap();
1719
1720 cx.run_until_parked();
1721
1722 // Simulate the model calling the terminal tool
1723 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1724 LanguageModelToolUse {
1725 id: "terminal_tool_1".into(),
1726 name: "terminal".into(),
1727 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1728 input: json!({"command": "sleep 1000", "cd": "."}),
1729 is_input_complete: true,
1730 thought_signature: None,
1731 },
1732 ));
1733 fake_model.end_last_completion_stream();
1734
1735 // Wait for the terminal tool to start running
1736 wait_for_terminal_tool_started(&mut events, cx).await;
1737
1738 // Cancel the thread while the terminal is running
1739 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
1740
1741 // Collect remaining events, driving the executor to let cancellation complete
1742 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1743
1744 // Verify the terminal was killed
1745 assert!(
1746 handle.was_killed(),
1747 "expected terminal handle to be killed on cancellation"
1748 );
1749
1750 // Verify we got a cancellation stop event
1751 assert_eq!(
1752 stop_events(remaining_events),
1753 vec![acp::StopReason::Cancelled],
1754 );
1755
1756 // Verify the tool result contains the terminal output, not just "Tool canceled by user"
1757 thread.update(cx, |thread, _cx| {
1758 let message = thread.last_message().unwrap();
1759 let agent_message = message.as_agent_message().unwrap();
1760
1761 let tool_use = agent_message
1762 .content
1763 .iter()
1764 .find_map(|content| match content {
1765 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
1766 _ => None,
1767 })
1768 .expect("expected tool use in agent message");
1769
1770 let tool_result = agent_message
1771 .tool_results
1772 .get(&tool_use.id)
1773 .expect("expected tool result");
1774
1775 let result_text = match &tool_result.content {
1776 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
1777 _ => panic!("expected text content in tool result"),
1778 };
1779
1780 // "partial output" comes from FakeTerminalHandle's output field
1781 assert!(
1782 result_text.contains("partial output"),
1783 "expected tool result to contain terminal output, got: {result_text}"
1784 );
1785 // Match the actual format from process_content in terminal_tool.rs
1786 assert!(
1787 result_text.contains("The user stopped this command"),
1788 "expected tool result to indicate user stopped, got: {result_text}"
1789 );
1790 });
1791
1792 // Verify we can send a new message after cancellation
1793 verify_thread_recovery(&thread, &fake_model, cx).await;
1794}
1795
1796#[gpui::test]
1797async fn test_cancellation_aware_tool_responds_to_cancellation(cx: &mut TestAppContext) {
1798 // This test verifies that tools which properly handle cancellation via
1799 // `event_stream.cancelled_by_user()` (like edit_file_tool) respond promptly
1800 // to cancellation and report that they were cancelled.
1801 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1802 always_allow_tools(cx);
1803 let fake_model = model.as_fake();
1804
1805 let (tool, was_cancelled) = CancellationAwareTool::new();
1806
1807 let mut events = thread
1808 .update(cx, |thread, cx| {
1809 thread.add_tool(tool);
1810 thread.send(
1811 UserMessageId::new(),
1812 ["call the cancellation aware tool"],
1813 cx,
1814 )
1815 })
1816 .unwrap();
1817
1818 cx.run_until_parked();
1819
1820 // Simulate the model calling the cancellation-aware tool
1821 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1822 LanguageModelToolUse {
1823 id: "cancellation_aware_1".into(),
1824 name: "cancellation_aware".into(),
1825 raw_input: r#"{}"#.into(),
1826 input: json!({}),
1827 is_input_complete: true,
1828 thought_signature: None,
1829 },
1830 ));
1831 fake_model.end_last_completion_stream();
1832
1833 cx.run_until_parked();
1834
1835 // Wait for the tool call to be reported
1836 let mut tool_started = false;
1837 let deadline = cx.executor().num_cpus() * 100;
1838 for _ in 0..deadline {
1839 cx.run_until_parked();
1840
1841 while let Some(Some(event)) = events.next().now_or_never() {
1842 if let Ok(ThreadEvent::ToolCall(tool_call)) = &event {
1843 if tool_call.title == "Cancellation Aware Tool" {
1844 tool_started = true;
1845 break;
1846 }
1847 }
1848 }
1849
1850 if tool_started {
1851 break;
1852 }
1853
1854 cx.background_executor
1855 .timer(Duration::from_millis(10))
1856 .await;
1857 }
1858 assert!(tool_started, "expected cancellation aware tool to start");
1859
1860 // Cancel the thread and wait for it to complete
1861 let cancel_task = thread.update(cx, |thread, cx| thread.cancel(cx));
1862
1863 // The cancel task should complete promptly because the tool handles cancellation
1864 let timeout = cx.background_executor.timer(Duration::from_secs(5));
1865 futures::select! {
1866 _ = cancel_task.fuse() => {}
1867 _ = timeout.fuse() => {
1868 panic!("cancel task timed out - tool did not respond to cancellation");
1869 }
1870 }
1871
1872 // Verify the tool detected cancellation via its flag
1873 assert!(
1874 was_cancelled.load(std::sync::atomic::Ordering::SeqCst),
1875 "tool should have detected cancellation via event_stream.cancelled_by_user()"
1876 );
1877
1878 // Collect remaining events
1879 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1880
1881 // Verify we got a cancellation stop event
1882 assert_eq!(
1883 stop_events(remaining_events),
1884 vec![acp::StopReason::Cancelled],
1885 );
1886
1887 // Verify we can send a new message after cancellation
1888 verify_thread_recovery(&thread, &fake_model, cx).await;
1889}
1890
1891/// Helper to verify thread can recover after cancellation by sending a simple message.
1892async fn verify_thread_recovery(
1893 thread: &Entity<Thread>,
1894 fake_model: &FakeLanguageModel,
1895 cx: &mut TestAppContext,
1896) {
1897 let events = thread
1898 .update(cx, |thread, cx| {
1899 thread.send(
1900 UserMessageId::new(),
1901 ["Testing: reply with 'Hello' then stop."],
1902 cx,
1903 )
1904 })
1905 .unwrap();
1906 cx.run_until_parked();
1907 fake_model.send_last_completion_stream_text_chunk("Hello");
1908 fake_model
1909 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
1910 fake_model.end_last_completion_stream();
1911
1912 let events = events.collect::<Vec<_>>().await;
1913 thread.update(cx, |thread, _cx| {
1914 let message = thread.last_message().unwrap();
1915 let agent_message = message.as_agent_message().unwrap();
1916 assert_eq!(
1917 agent_message.content,
1918 vec![AgentMessageContent::Text("Hello".to_string())]
1919 );
1920 });
1921 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1922}
1923
1924/// Waits for a terminal tool to start by watching for a ToolCallUpdate with terminal content.
1925async fn wait_for_terminal_tool_started(
1926 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1927 cx: &mut TestAppContext,
1928) {
1929 let deadline = cx.executor().num_cpus() * 100; // Scale with available parallelism
1930 for _ in 0..deadline {
1931 cx.run_until_parked();
1932
1933 while let Some(Some(event)) = events.next().now_or_never() {
1934 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1935 update,
1936 ))) = &event
1937 {
1938 if update.fields.content.as_ref().is_some_and(|content| {
1939 content
1940 .iter()
1941 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1942 }) {
1943 return;
1944 }
1945 }
1946 }
1947
1948 cx.background_executor
1949 .timer(Duration::from_millis(10))
1950 .await;
1951 }
1952 panic!("terminal tool did not start within the expected time");
1953}
1954
1955/// Collects events until a Stop event is received, driving the executor to completion.
1956async fn collect_events_until_stop(
1957 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1958 cx: &mut TestAppContext,
1959) -> Vec<Result<ThreadEvent>> {
1960 let mut collected = Vec::new();
1961 let deadline = cx.executor().num_cpus() * 200;
1962
1963 for _ in 0..deadline {
1964 cx.executor().advance_clock(Duration::from_millis(10));
1965 cx.run_until_parked();
1966
1967 while let Some(Some(event)) = events.next().now_or_never() {
1968 let is_stop = matches!(&event, Ok(ThreadEvent::Stop(_)));
1969 collected.push(event);
1970 if is_stop {
1971 return collected;
1972 }
1973 }
1974 }
1975 panic!(
1976 "did not receive Stop event within the expected time; collected {} events",
1977 collected.len()
1978 );
1979}
1980
1981#[gpui::test]
1982async fn test_truncate_while_terminal_tool_running(cx: &mut TestAppContext) {
1983 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1984 always_allow_tools(cx);
1985 let fake_model = model.as_fake();
1986
1987 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1988 let environment = Rc::new(FakeThreadEnvironment {
1989 handle: handle.clone(),
1990 });
1991
1992 let message_id = UserMessageId::new();
1993 let mut events = thread
1994 .update(cx, |thread, cx| {
1995 thread.add_tool(crate::TerminalTool::new(
1996 thread.project().clone(),
1997 environment,
1998 ));
1999 thread.send(message_id.clone(), ["run a command"], cx)
2000 })
2001 .unwrap();
2002
2003 cx.run_until_parked();
2004
2005 // Simulate the model calling the terminal tool
2006 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2007 LanguageModelToolUse {
2008 id: "terminal_tool_1".into(),
2009 name: "terminal".into(),
2010 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2011 input: json!({"command": "sleep 1000", "cd": "."}),
2012 is_input_complete: true,
2013 thought_signature: None,
2014 },
2015 ));
2016 fake_model.end_last_completion_stream();
2017
2018 // Wait for the terminal tool to start running
2019 wait_for_terminal_tool_started(&mut events, cx).await;
2020
2021 // Truncate the thread while the terminal is running
2022 thread
2023 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2024 .unwrap();
2025
2026 // Drive the executor to let cancellation complete
2027 let _ = collect_events_until_stop(&mut events, cx).await;
2028
2029 // Verify the terminal was killed
2030 assert!(
2031 handle.was_killed(),
2032 "expected terminal handle to be killed on truncate"
2033 );
2034
2035 // Verify the thread is empty after truncation
2036 thread.update(cx, |thread, _cx| {
2037 assert_eq!(
2038 thread.to_markdown(),
2039 "",
2040 "expected thread to be empty after truncating the only message"
2041 );
2042 });
2043
2044 // Verify we can send a new message after truncation
2045 verify_thread_recovery(&thread, &fake_model, cx).await;
2046}
2047
2048#[gpui::test]
2049async fn test_cancel_multiple_concurrent_terminal_tools(cx: &mut TestAppContext) {
2050 // Tests that cancellation properly kills all running terminal tools when multiple are active.
2051 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2052 always_allow_tools(cx);
2053 let fake_model = model.as_fake();
2054
2055 let environment = Rc::new(MultiTerminalEnvironment::new());
2056
2057 let mut events = thread
2058 .update(cx, |thread, cx| {
2059 thread.add_tool(crate::TerminalTool::new(
2060 thread.project().clone(),
2061 environment.clone(),
2062 ));
2063 thread.send(UserMessageId::new(), ["run multiple commands"], cx)
2064 })
2065 .unwrap();
2066
2067 cx.run_until_parked();
2068
2069 // Simulate the model calling two terminal tools
2070 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2071 LanguageModelToolUse {
2072 id: "terminal_tool_1".into(),
2073 name: "terminal".into(),
2074 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2075 input: json!({"command": "sleep 1000", "cd": "."}),
2076 is_input_complete: true,
2077 thought_signature: None,
2078 },
2079 ));
2080 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2081 LanguageModelToolUse {
2082 id: "terminal_tool_2".into(),
2083 name: "terminal".into(),
2084 raw_input: r#"{"command": "sleep 2000", "cd": "."}"#.into(),
2085 input: json!({"command": "sleep 2000", "cd": "."}),
2086 is_input_complete: true,
2087 thought_signature: None,
2088 },
2089 ));
2090 fake_model.end_last_completion_stream();
2091
2092 // Wait for both terminal tools to start by counting terminal content updates
2093 let mut terminals_started = 0;
2094 let deadline = cx.executor().num_cpus() * 100;
2095 for _ in 0..deadline {
2096 cx.run_until_parked();
2097
2098 while let Some(Some(event)) = events.next().now_or_never() {
2099 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
2100 update,
2101 ))) = &event
2102 {
2103 if update.fields.content.as_ref().is_some_and(|content| {
2104 content
2105 .iter()
2106 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
2107 }) {
2108 terminals_started += 1;
2109 if terminals_started >= 2 {
2110 break;
2111 }
2112 }
2113 }
2114 }
2115 if terminals_started >= 2 {
2116 break;
2117 }
2118
2119 cx.background_executor
2120 .timer(Duration::from_millis(10))
2121 .await;
2122 }
2123 assert!(
2124 terminals_started >= 2,
2125 "expected 2 terminal tools to start, got {terminals_started}"
2126 );
2127
2128 // Cancel the thread while both terminals are running
2129 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
2130
2131 // Collect remaining events
2132 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2133
2134 // Verify both terminal handles were killed
2135 let handles = environment.handles();
2136 assert_eq!(
2137 handles.len(),
2138 2,
2139 "expected 2 terminal handles to be created"
2140 );
2141 assert!(
2142 handles[0].was_killed(),
2143 "expected first terminal handle to be killed on cancellation"
2144 );
2145 assert!(
2146 handles[1].was_killed(),
2147 "expected second terminal handle to be killed on cancellation"
2148 );
2149
2150 // Verify we got a cancellation stop event
2151 assert_eq!(
2152 stop_events(remaining_events),
2153 vec![acp::StopReason::Cancelled],
2154 );
2155}
2156
2157#[gpui::test]
2158async fn test_terminal_tool_stopped_via_terminal_card_button(cx: &mut TestAppContext) {
2159 // Tests that clicking the stop button on the terminal card (as opposed to the main
2160 // cancel button) properly reports user stopped via the was_stopped_by_user path.
2161 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2162 always_allow_tools(cx);
2163 let fake_model = model.as_fake();
2164
2165 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2166 let environment = Rc::new(FakeThreadEnvironment {
2167 handle: handle.clone(),
2168 });
2169
2170 let mut events = thread
2171 .update(cx, |thread, cx| {
2172 thread.add_tool(crate::TerminalTool::new(
2173 thread.project().clone(),
2174 environment,
2175 ));
2176 thread.send(UserMessageId::new(), ["run a command"], cx)
2177 })
2178 .unwrap();
2179
2180 cx.run_until_parked();
2181
2182 // Simulate the model calling the terminal tool
2183 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2184 LanguageModelToolUse {
2185 id: "terminal_tool_1".into(),
2186 name: "terminal".into(),
2187 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2188 input: json!({"command": "sleep 1000", "cd": "."}),
2189 is_input_complete: true,
2190 thought_signature: None,
2191 },
2192 ));
2193 fake_model.end_last_completion_stream();
2194
2195 // Wait for the terminal tool to start running
2196 wait_for_terminal_tool_started(&mut events, cx).await;
2197
2198 // Simulate user clicking stop on the terminal card itself.
2199 // This sets the flag and signals exit (simulating what the real UI would do).
2200 handle.set_stopped_by_user(true);
2201 handle.killed.store(true, Ordering::SeqCst);
2202 handle.signal_exit();
2203
2204 // Wait for the tool to complete
2205 cx.run_until_parked();
2206
2207 // The thread continues after tool completion - simulate the model ending its turn
2208 fake_model
2209 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2210 fake_model.end_last_completion_stream();
2211
2212 // Collect remaining events
2213 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2214
2215 // Verify we got an EndTurn (not Cancelled, since we didn't cancel the thread)
2216 assert_eq!(
2217 stop_events(remaining_events),
2218 vec![acp::StopReason::EndTurn],
2219 );
2220
2221 // Verify the tool result indicates user stopped
2222 thread.update(cx, |thread, _cx| {
2223 let message = thread.last_message().unwrap();
2224 let agent_message = message.as_agent_message().unwrap();
2225
2226 let tool_use = agent_message
2227 .content
2228 .iter()
2229 .find_map(|content| match content {
2230 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2231 _ => None,
2232 })
2233 .expect("expected tool use in agent message");
2234
2235 let tool_result = agent_message
2236 .tool_results
2237 .get(&tool_use.id)
2238 .expect("expected tool result");
2239
2240 let result_text = match &tool_result.content {
2241 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2242 _ => panic!("expected text content in tool result"),
2243 };
2244
2245 assert!(
2246 result_text.contains("The user stopped this command"),
2247 "expected tool result to indicate user stopped, got: {result_text}"
2248 );
2249 });
2250}
2251
2252#[gpui::test]
2253async fn test_terminal_tool_timeout_expires(cx: &mut TestAppContext) {
2254 // Tests that when a timeout is configured and expires, the tool result indicates timeout.
2255 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2256 always_allow_tools(cx);
2257 let fake_model = model.as_fake();
2258
2259 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2260 let environment = Rc::new(FakeThreadEnvironment {
2261 handle: handle.clone(),
2262 });
2263
2264 let mut events = thread
2265 .update(cx, |thread, cx| {
2266 thread.add_tool(crate::TerminalTool::new(
2267 thread.project().clone(),
2268 environment,
2269 ));
2270 thread.send(UserMessageId::new(), ["run a command with timeout"], cx)
2271 })
2272 .unwrap();
2273
2274 cx.run_until_parked();
2275
2276 // Simulate the model calling the terminal tool with a short timeout
2277 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2278 LanguageModelToolUse {
2279 id: "terminal_tool_1".into(),
2280 name: "terminal".into(),
2281 raw_input: r#"{"command": "sleep 1000", "cd": ".", "timeout_ms": 100}"#.into(),
2282 input: json!({"command": "sleep 1000", "cd": ".", "timeout_ms": 100}),
2283 is_input_complete: true,
2284 thought_signature: None,
2285 },
2286 ));
2287 fake_model.end_last_completion_stream();
2288
2289 // Wait for the terminal tool to start running
2290 wait_for_terminal_tool_started(&mut events, cx).await;
2291
2292 // Advance clock past the timeout
2293 cx.executor().advance_clock(Duration::from_millis(200));
2294 cx.run_until_parked();
2295
2296 // The thread continues after tool completion - simulate the model ending its turn
2297 fake_model
2298 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2299 fake_model.end_last_completion_stream();
2300
2301 // Collect remaining events
2302 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2303
2304 // Verify the terminal was killed due to timeout
2305 assert!(
2306 handle.was_killed(),
2307 "expected terminal handle to be killed on timeout"
2308 );
2309
2310 // Verify we got an EndTurn (the tool completed, just with timeout)
2311 assert_eq!(
2312 stop_events(remaining_events),
2313 vec![acp::StopReason::EndTurn],
2314 );
2315
2316 // Verify the tool result indicates timeout, not user stopped
2317 thread.update(cx, |thread, _cx| {
2318 let message = thread.last_message().unwrap();
2319 let agent_message = message.as_agent_message().unwrap();
2320
2321 let tool_use = agent_message
2322 .content
2323 .iter()
2324 .find_map(|content| match content {
2325 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2326 _ => None,
2327 })
2328 .expect("expected tool use in agent message");
2329
2330 let tool_result = agent_message
2331 .tool_results
2332 .get(&tool_use.id)
2333 .expect("expected tool result");
2334
2335 let result_text = match &tool_result.content {
2336 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2337 _ => panic!("expected text content in tool result"),
2338 };
2339
2340 assert!(
2341 result_text.contains("timed out"),
2342 "expected tool result to indicate timeout, got: {result_text}"
2343 );
2344 assert!(
2345 !result_text.contains("The user stopped"),
2346 "tool result should not mention user stopped when it timed out, got: {result_text}"
2347 );
2348 });
2349}
2350
2351#[gpui::test]
2352async fn test_in_progress_send_canceled_by_next_send(cx: &mut TestAppContext) {
2353 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2354 let fake_model = model.as_fake();
2355
2356 let events_1 = thread
2357 .update(cx, |thread, cx| {
2358 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2359 })
2360 .unwrap();
2361 cx.run_until_parked();
2362 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2363 cx.run_until_parked();
2364
2365 let events_2 = thread
2366 .update(cx, |thread, cx| {
2367 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2368 })
2369 .unwrap();
2370 cx.run_until_parked();
2371 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2372 fake_model
2373 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2374 fake_model.end_last_completion_stream();
2375
2376 let events_1 = events_1.collect::<Vec<_>>().await;
2377 assert_eq!(stop_events(events_1), vec![acp::StopReason::Cancelled]);
2378 let events_2 = events_2.collect::<Vec<_>>().await;
2379 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2380}
2381
2382#[gpui::test]
2383async fn test_subsequent_successful_sends_dont_cancel(cx: &mut TestAppContext) {
2384 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2385 let fake_model = model.as_fake();
2386
2387 let events_1 = thread
2388 .update(cx, |thread, cx| {
2389 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2390 })
2391 .unwrap();
2392 cx.run_until_parked();
2393 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2394 fake_model
2395 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2396 fake_model.end_last_completion_stream();
2397 let events_1 = events_1.collect::<Vec<_>>().await;
2398
2399 let events_2 = thread
2400 .update(cx, |thread, cx| {
2401 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2402 })
2403 .unwrap();
2404 cx.run_until_parked();
2405 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2406 fake_model
2407 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2408 fake_model.end_last_completion_stream();
2409 let events_2 = events_2.collect::<Vec<_>>().await;
2410
2411 assert_eq!(stop_events(events_1), vec![acp::StopReason::EndTurn]);
2412 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2413}
2414
2415#[gpui::test]
2416async fn test_refusal(cx: &mut TestAppContext) {
2417 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2418 let fake_model = model.as_fake();
2419
2420 let events = thread
2421 .update(cx, |thread, cx| {
2422 thread.send(UserMessageId::new(), ["Hello"], cx)
2423 })
2424 .unwrap();
2425 cx.run_until_parked();
2426 thread.read_with(cx, |thread, _| {
2427 assert_eq!(
2428 thread.to_markdown(),
2429 indoc! {"
2430 ## User
2431
2432 Hello
2433 "}
2434 );
2435 });
2436
2437 fake_model.send_last_completion_stream_text_chunk("Hey!");
2438 cx.run_until_parked();
2439 thread.read_with(cx, |thread, _| {
2440 assert_eq!(
2441 thread.to_markdown(),
2442 indoc! {"
2443 ## User
2444
2445 Hello
2446
2447 ## Assistant
2448
2449 Hey!
2450 "}
2451 );
2452 });
2453
2454 // If the model refuses to continue, the thread should remove all the messages after the last user message.
2455 fake_model
2456 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::Refusal));
2457 let events = events.collect::<Vec<_>>().await;
2458 assert_eq!(stop_events(events), vec![acp::StopReason::Refusal]);
2459 thread.read_with(cx, |thread, _| {
2460 assert_eq!(thread.to_markdown(), "");
2461 });
2462}
2463
2464#[gpui::test]
2465async fn test_truncate_first_message(cx: &mut TestAppContext) {
2466 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2467 let fake_model = model.as_fake();
2468
2469 let message_id = UserMessageId::new();
2470 thread
2471 .update(cx, |thread, cx| {
2472 thread.send(message_id.clone(), ["Hello"], cx)
2473 })
2474 .unwrap();
2475 cx.run_until_parked();
2476 thread.read_with(cx, |thread, _| {
2477 assert_eq!(
2478 thread.to_markdown(),
2479 indoc! {"
2480 ## User
2481
2482 Hello
2483 "}
2484 );
2485 assert_eq!(thread.latest_token_usage(), None);
2486 });
2487
2488 fake_model.send_last_completion_stream_text_chunk("Hey!");
2489 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2490 language_model::TokenUsage {
2491 input_tokens: 32_000,
2492 output_tokens: 16_000,
2493 cache_creation_input_tokens: 0,
2494 cache_read_input_tokens: 0,
2495 },
2496 ));
2497 cx.run_until_parked();
2498 thread.read_with(cx, |thread, _| {
2499 assert_eq!(
2500 thread.to_markdown(),
2501 indoc! {"
2502 ## User
2503
2504 Hello
2505
2506 ## Assistant
2507
2508 Hey!
2509 "}
2510 );
2511 assert_eq!(
2512 thread.latest_token_usage(),
2513 Some(acp_thread::TokenUsage {
2514 used_tokens: 32_000 + 16_000,
2515 max_tokens: 1_000_000,
2516 output_tokens: 16_000,
2517 })
2518 );
2519 });
2520
2521 thread
2522 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2523 .unwrap();
2524 cx.run_until_parked();
2525 thread.read_with(cx, |thread, _| {
2526 assert_eq!(thread.to_markdown(), "");
2527 assert_eq!(thread.latest_token_usage(), None);
2528 });
2529
2530 // Ensure we can still send a new message after truncation.
2531 thread
2532 .update(cx, |thread, cx| {
2533 thread.send(UserMessageId::new(), ["Hi"], cx)
2534 })
2535 .unwrap();
2536 thread.update(cx, |thread, _cx| {
2537 assert_eq!(
2538 thread.to_markdown(),
2539 indoc! {"
2540 ## User
2541
2542 Hi
2543 "}
2544 );
2545 });
2546 cx.run_until_parked();
2547 fake_model.send_last_completion_stream_text_chunk("Ahoy!");
2548 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2549 language_model::TokenUsage {
2550 input_tokens: 40_000,
2551 output_tokens: 20_000,
2552 cache_creation_input_tokens: 0,
2553 cache_read_input_tokens: 0,
2554 },
2555 ));
2556 cx.run_until_parked();
2557 thread.read_with(cx, |thread, _| {
2558 assert_eq!(
2559 thread.to_markdown(),
2560 indoc! {"
2561 ## User
2562
2563 Hi
2564
2565 ## Assistant
2566
2567 Ahoy!
2568 "}
2569 );
2570
2571 assert_eq!(
2572 thread.latest_token_usage(),
2573 Some(acp_thread::TokenUsage {
2574 used_tokens: 40_000 + 20_000,
2575 max_tokens: 1_000_000,
2576 output_tokens: 20_000,
2577 })
2578 );
2579 });
2580}
2581
2582#[gpui::test]
2583async fn test_truncate_second_message(cx: &mut TestAppContext) {
2584 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2585 let fake_model = model.as_fake();
2586
2587 thread
2588 .update(cx, |thread, cx| {
2589 thread.send(UserMessageId::new(), ["Message 1"], cx)
2590 })
2591 .unwrap();
2592 cx.run_until_parked();
2593 fake_model.send_last_completion_stream_text_chunk("Message 1 response");
2594 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2595 language_model::TokenUsage {
2596 input_tokens: 32_000,
2597 output_tokens: 16_000,
2598 cache_creation_input_tokens: 0,
2599 cache_read_input_tokens: 0,
2600 },
2601 ));
2602 fake_model.end_last_completion_stream();
2603 cx.run_until_parked();
2604
2605 let assert_first_message_state = |cx: &mut TestAppContext| {
2606 thread.clone().read_with(cx, |thread, _| {
2607 assert_eq!(
2608 thread.to_markdown(),
2609 indoc! {"
2610 ## User
2611
2612 Message 1
2613
2614 ## Assistant
2615
2616 Message 1 response
2617 "}
2618 );
2619
2620 assert_eq!(
2621 thread.latest_token_usage(),
2622 Some(acp_thread::TokenUsage {
2623 used_tokens: 32_000 + 16_000,
2624 max_tokens: 1_000_000,
2625 output_tokens: 16_000,
2626 })
2627 );
2628 });
2629 };
2630
2631 assert_first_message_state(cx);
2632
2633 let second_message_id = UserMessageId::new();
2634 thread
2635 .update(cx, |thread, cx| {
2636 thread.send(second_message_id.clone(), ["Message 2"], cx)
2637 })
2638 .unwrap();
2639 cx.run_until_parked();
2640
2641 fake_model.send_last_completion_stream_text_chunk("Message 2 response");
2642 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2643 language_model::TokenUsage {
2644 input_tokens: 40_000,
2645 output_tokens: 20_000,
2646 cache_creation_input_tokens: 0,
2647 cache_read_input_tokens: 0,
2648 },
2649 ));
2650 fake_model.end_last_completion_stream();
2651 cx.run_until_parked();
2652
2653 thread.read_with(cx, |thread, _| {
2654 assert_eq!(
2655 thread.to_markdown(),
2656 indoc! {"
2657 ## User
2658
2659 Message 1
2660
2661 ## Assistant
2662
2663 Message 1 response
2664
2665 ## User
2666
2667 Message 2
2668
2669 ## Assistant
2670
2671 Message 2 response
2672 "}
2673 );
2674
2675 assert_eq!(
2676 thread.latest_token_usage(),
2677 Some(acp_thread::TokenUsage {
2678 used_tokens: 40_000 + 20_000,
2679 max_tokens: 1_000_000,
2680 output_tokens: 20_000,
2681 })
2682 );
2683 });
2684
2685 thread
2686 .update(cx, |thread, cx| thread.truncate(second_message_id, cx))
2687 .unwrap();
2688 cx.run_until_parked();
2689
2690 assert_first_message_state(cx);
2691}
2692
2693#[gpui::test]
2694async fn test_title_generation(cx: &mut TestAppContext) {
2695 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2696 let fake_model = model.as_fake();
2697
2698 let summary_model = Arc::new(FakeLanguageModel::default());
2699 thread.update(cx, |thread, cx| {
2700 thread.set_summarization_model(Some(summary_model.clone()), cx)
2701 });
2702
2703 let send = thread
2704 .update(cx, |thread, cx| {
2705 thread.send(UserMessageId::new(), ["Hello"], cx)
2706 })
2707 .unwrap();
2708 cx.run_until_parked();
2709
2710 fake_model.send_last_completion_stream_text_chunk("Hey!");
2711 fake_model.end_last_completion_stream();
2712 cx.run_until_parked();
2713 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "New Thread"));
2714
2715 // Ensure the summary model has been invoked to generate a title.
2716 summary_model.send_last_completion_stream_text_chunk("Hello ");
2717 summary_model.send_last_completion_stream_text_chunk("world\nG");
2718 summary_model.send_last_completion_stream_text_chunk("oodnight Moon");
2719 summary_model.end_last_completion_stream();
2720 send.collect::<Vec<_>>().await;
2721 cx.run_until_parked();
2722 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2723
2724 // Send another message, ensuring no title is generated this time.
2725 let send = thread
2726 .update(cx, |thread, cx| {
2727 thread.send(UserMessageId::new(), ["Hello again"], cx)
2728 })
2729 .unwrap();
2730 cx.run_until_parked();
2731 fake_model.send_last_completion_stream_text_chunk("Hey again!");
2732 fake_model.end_last_completion_stream();
2733 cx.run_until_parked();
2734 assert_eq!(summary_model.pending_completions(), Vec::new());
2735 send.collect::<Vec<_>>().await;
2736 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2737}
2738
2739#[gpui::test]
2740async fn test_building_request_with_pending_tools(cx: &mut TestAppContext) {
2741 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2742 let fake_model = model.as_fake();
2743
2744 let _events = thread
2745 .update(cx, |thread, cx| {
2746 thread.add_tool(ToolRequiringPermission);
2747 thread.add_tool(EchoTool);
2748 thread.send(UserMessageId::new(), ["Hey!"], cx)
2749 })
2750 .unwrap();
2751 cx.run_until_parked();
2752
2753 let permission_tool_use = LanguageModelToolUse {
2754 id: "tool_id_1".into(),
2755 name: ToolRequiringPermission::name().into(),
2756 raw_input: "{}".into(),
2757 input: json!({}),
2758 is_input_complete: true,
2759 thought_signature: None,
2760 };
2761 let echo_tool_use = LanguageModelToolUse {
2762 id: "tool_id_2".into(),
2763 name: EchoTool::name().into(),
2764 raw_input: json!({"text": "test"}).to_string(),
2765 input: json!({"text": "test"}),
2766 is_input_complete: true,
2767 thought_signature: None,
2768 };
2769 fake_model.send_last_completion_stream_text_chunk("Hi!");
2770 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2771 permission_tool_use,
2772 ));
2773 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2774 echo_tool_use.clone(),
2775 ));
2776 fake_model.end_last_completion_stream();
2777 cx.run_until_parked();
2778
2779 // Ensure pending tools are skipped when building a request.
2780 let request = thread
2781 .read_with(cx, |thread, cx| {
2782 thread.build_completion_request(CompletionIntent::EditFile, cx)
2783 })
2784 .unwrap();
2785 assert_eq!(
2786 request.messages[1..],
2787 vec![
2788 LanguageModelRequestMessage {
2789 role: Role::User,
2790 content: vec!["Hey!".into()],
2791 cache: true,
2792 reasoning_details: None,
2793 },
2794 LanguageModelRequestMessage {
2795 role: Role::Assistant,
2796 content: vec![
2797 MessageContent::Text("Hi!".into()),
2798 MessageContent::ToolUse(echo_tool_use.clone())
2799 ],
2800 cache: false,
2801 reasoning_details: None,
2802 },
2803 LanguageModelRequestMessage {
2804 role: Role::User,
2805 content: vec![MessageContent::ToolResult(LanguageModelToolResult {
2806 tool_use_id: echo_tool_use.id.clone(),
2807 tool_name: echo_tool_use.name,
2808 is_error: false,
2809 content: "test".into(),
2810 output: Some("test".into())
2811 })],
2812 cache: false,
2813 reasoning_details: None,
2814 },
2815 ],
2816 );
2817}
2818
2819#[gpui::test]
2820async fn test_agent_connection(cx: &mut TestAppContext) {
2821 cx.update(settings::init);
2822 let templates = Templates::new();
2823
2824 // Initialize language model system with test provider
2825 cx.update(|cx| {
2826 gpui_tokio::init(cx);
2827
2828 let http_client = FakeHttpClient::with_404_response();
2829 let clock = Arc::new(clock::FakeSystemClock::new());
2830 let client = Client::new(clock, http_client, cx);
2831 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
2832 language_model::init(client.clone(), cx);
2833 language_models::init(user_store, client.clone(), cx);
2834 LanguageModelRegistry::test(cx);
2835 });
2836 cx.executor().forbid_parking();
2837
2838 // Create a project for new_thread
2839 let fake_fs = cx.update(|cx| fs::FakeFs::new(cx.background_executor().clone()));
2840 fake_fs.insert_tree(path!("/test"), json!({})).await;
2841 let project = Project::test(fake_fs.clone(), [Path::new("/test")], cx).await;
2842 let cwd = Path::new("/test");
2843 let thread_store = cx.new(|cx| ThreadStore::new(cx));
2844
2845 // Create agent and connection
2846 let agent = NativeAgent::new(
2847 project.clone(),
2848 thread_store,
2849 templates.clone(),
2850 None,
2851 fake_fs.clone(),
2852 &mut cx.to_async(),
2853 )
2854 .await
2855 .unwrap();
2856 let connection = NativeAgentConnection(agent.clone());
2857
2858 // Create a thread using new_thread
2859 let connection_rc = Rc::new(connection.clone());
2860 let acp_thread = cx
2861 .update(|cx| connection_rc.new_thread(project, cwd, cx))
2862 .await
2863 .expect("new_thread should succeed");
2864
2865 // Get the session_id from the AcpThread
2866 let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
2867
2868 // Test model_selector returns Some
2869 let selector_opt = connection.model_selector(&session_id);
2870 assert!(
2871 selector_opt.is_some(),
2872 "agent should always support ModelSelector"
2873 );
2874 let selector = selector_opt.unwrap();
2875
2876 // Test list_models
2877 let listed_models = cx
2878 .update(|cx| selector.list_models(cx))
2879 .await
2880 .expect("list_models should succeed");
2881 let AgentModelList::Grouped(listed_models) = listed_models else {
2882 panic!("Unexpected model list type");
2883 };
2884 assert!(!listed_models.is_empty(), "should have at least one model");
2885 assert_eq!(
2886 listed_models[&AgentModelGroupName("Fake".into())][0]
2887 .id
2888 .0
2889 .as_ref(),
2890 "fake/fake"
2891 );
2892
2893 // Test selected_model returns the default
2894 let model = cx
2895 .update(|cx| selector.selected_model(cx))
2896 .await
2897 .expect("selected_model should succeed");
2898 let model = cx
2899 .update(|cx| agent.read(cx).models().model_from_id(&model.id))
2900 .unwrap();
2901 let model = model.as_fake();
2902 assert_eq!(model.id().0, "fake", "should return default model");
2903
2904 let request = acp_thread.update(cx, |thread, cx| thread.send(vec!["abc".into()], cx));
2905 cx.run_until_parked();
2906 model.send_last_completion_stream_text_chunk("def");
2907 cx.run_until_parked();
2908 acp_thread.read_with(cx, |thread, cx| {
2909 assert_eq!(
2910 thread.to_markdown(cx),
2911 indoc! {"
2912 ## User
2913
2914 abc
2915
2916 ## Assistant
2917
2918 def
2919
2920 "}
2921 )
2922 });
2923
2924 // Test cancel
2925 cx.update(|cx| connection.cancel(&session_id, cx));
2926 request.await.expect("prompt should fail gracefully");
2927
2928 // Ensure that dropping the ACP thread causes the native thread to be
2929 // dropped as well.
2930 cx.update(|_| drop(acp_thread));
2931 let result = cx
2932 .update(|cx| {
2933 connection.prompt(
2934 Some(acp_thread::UserMessageId::new()),
2935 acp::PromptRequest::new(session_id.clone(), vec!["ghi".into()]),
2936 cx,
2937 )
2938 })
2939 .await;
2940 assert_eq!(
2941 result.as_ref().unwrap_err().to_string(),
2942 "Session not found",
2943 "unexpected result: {:?}",
2944 result
2945 );
2946}
2947
2948#[gpui::test]
2949async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
2950 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2951 thread.update(cx, |thread, _cx| thread.add_tool(ThinkingTool));
2952 let fake_model = model.as_fake();
2953
2954 let mut events = thread
2955 .update(cx, |thread, cx| {
2956 thread.send(UserMessageId::new(), ["Think"], cx)
2957 })
2958 .unwrap();
2959 cx.run_until_parked();
2960
2961 // Simulate streaming partial input.
2962 let input = json!({});
2963 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2964 LanguageModelToolUse {
2965 id: "1".into(),
2966 name: ThinkingTool::name().into(),
2967 raw_input: input.to_string(),
2968 input,
2969 is_input_complete: false,
2970 thought_signature: None,
2971 },
2972 ));
2973
2974 // Input streaming completed
2975 let input = json!({ "content": "Thinking hard!" });
2976 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2977 LanguageModelToolUse {
2978 id: "1".into(),
2979 name: "thinking".into(),
2980 raw_input: input.to_string(),
2981 input,
2982 is_input_complete: true,
2983 thought_signature: None,
2984 },
2985 ));
2986 fake_model.end_last_completion_stream();
2987 cx.run_until_parked();
2988
2989 let tool_call = expect_tool_call(&mut events).await;
2990 assert_eq!(
2991 tool_call,
2992 acp::ToolCall::new("1", "Thinking")
2993 .kind(acp::ToolKind::Think)
2994 .raw_input(json!({}))
2995 .meta(acp_thread::meta_with_tool_name("thinking"))
2996 );
2997 let update = expect_tool_call_update_fields(&mut events).await;
2998 assert_eq!(
2999 update,
3000 acp::ToolCallUpdate::new(
3001 "1",
3002 acp::ToolCallUpdateFields::new()
3003 .title("Thinking")
3004 .kind(acp::ToolKind::Think)
3005 .raw_input(json!({ "content": "Thinking hard!"}))
3006 )
3007 );
3008 let update = expect_tool_call_update_fields(&mut events).await;
3009 assert_eq!(
3010 update,
3011 acp::ToolCallUpdate::new(
3012 "1",
3013 acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
3014 )
3015 );
3016 let update = expect_tool_call_update_fields(&mut events).await;
3017 assert_eq!(
3018 update,
3019 acp::ToolCallUpdate::new(
3020 "1",
3021 acp::ToolCallUpdateFields::new().content(vec!["Thinking hard!".into()])
3022 )
3023 );
3024 let update = expect_tool_call_update_fields(&mut events).await;
3025 assert_eq!(
3026 update,
3027 acp::ToolCallUpdate::new(
3028 "1",
3029 acp::ToolCallUpdateFields::new()
3030 .status(acp::ToolCallStatus::Completed)
3031 .raw_output("Finished thinking.")
3032 )
3033 );
3034}
3035
3036#[gpui::test]
3037async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
3038 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3039 let fake_model = model.as_fake();
3040
3041 let mut events = thread
3042 .update(cx, |thread, cx| {
3043 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3044 thread.send(UserMessageId::new(), ["Hello!"], cx)
3045 })
3046 .unwrap();
3047 cx.run_until_parked();
3048
3049 fake_model.send_last_completion_stream_text_chunk("Hey!");
3050 fake_model.end_last_completion_stream();
3051
3052 let mut retry_events = Vec::new();
3053 while let Some(Ok(event)) = events.next().await {
3054 match event {
3055 ThreadEvent::Retry(retry_status) => {
3056 retry_events.push(retry_status);
3057 }
3058 ThreadEvent::Stop(..) => break,
3059 _ => {}
3060 }
3061 }
3062
3063 assert_eq!(retry_events.len(), 0);
3064 thread.read_with(cx, |thread, _cx| {
3065 assert_eq!(
3066 thread.to_markdown(),
3067 indoc! {"
3068 ## User
3069
3070 Hello!
3071
3072 ## Assistant
3073
3074 Hey!
3075 "}
3076 )
3077 });
3078}
3079
3080#[gpui::test]
3081async fn test_send_retry_on_error(cx: &mut TestAppContext) {
3082 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3083 let fake_model = model.as_fake();
3084
3085 let mut events = thread
3086 .update(cx, |thread, cx| {
3087 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3088 thread.send(UserMessageId::new(), ["Hello!"], cx)
3089 })
3090 .unwrap();
3091 cx.run_until_parked();
3092
3093 fake_model.send_last_completion_stream_text_chunk("Hey,");
3094 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3095 provider: LanguageModelProviderName::new("Anthropic"),
3096 retry_after: Some(Duration::from_secs(3)),
3097 });
3098 fake_model.end_last_completion_stream();
3099
3100 cx.executor().advance_clock(Duration::from_secs(3));
3101 cx.run_until_parked();
3102
3103 fake_model.send_last_completion_stream_text_chunk("there!");
3104 fake_model.end_last_completion_stream();
3105 cx.run_until_parked();
3106
3107 let mut retry_events = Vec::new();
3108 while let Some(Ok(event)) = events.next().await {
3109 match event {
3110 ThreadEvent::Retry(retry_status) => {
3111 retry_events.push(retry_status);
3112 }
3113 ThreadEvent::Stop(..) => break,
3114 _ => {}
3115 }
3116 }
3117
3118 assert_eq!(retry_events.len(), 1);
3119 assert!(matches!(
3120 retry_events[0],
3121 acp_thread::RetryStatus { attempt: 1, .. }
3122 ));
3123 thread.read_with(cx, |thread, _cx| {
3124 assert_eq!(
3125 thread.to_markdown(),
3126 indoc! {"
3127 ## User
3128
3129 Hello!
3130
3131 ## Assistant
3132
3133 Hey,
3134
3135 [resume]
3136
3137 ## Assistant
3138
3139 there!
3140 "}
3141 )
3142 });
3143}
3144
3145#[gpui::test]
3146async fn test_send_retry_finishes_tool_calls_on_error(cx: &mut TestAppContext) {
3147 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3148 let fake_model = model.as_fake();
3149
3150 let events = thread
3151 .update(cx, |thread, cx| {
3152 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3153 thread.add_tool(EchoTool);
3154 thread.send(UserMessageId::new(), ["Call the echo tool!"], cx)
3155 })
3156 .unwrap();
3157 cx.run_until_parked();
3158
3159 let tool_use_1 = LanguageModelToolUse {
3160 id: "tool_1".into(),
3161 name: EchoTool::name().into(),
3162 raw_input: json!({"text": "test"}).to_string(),
3163 input: json!({"text": "test"}),
3164 is_input_complete: true,
3165 thought_signature: None,
3166 };
3167 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
3168 tool_use_1.clone(),
3169 ));
3170 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3171 provider: LanguageModelProviderName::new("Anthropic"),
3172 retry_after: Some(Duration::from_secs(3)),
3173 });
3174 fake_model.end_last_completion_stream();
3175
3176 cx.executor().advance_clock(Duration::from_secs(3));
3177 let completion = fake_model.pending_completions().pop().unwrap();
3178 assert_eq!(
3179 completion.messages[1..],
3180 vec![
3181 LanguageModelRequestMessage {
3182 role: Role::User,
3183 content: vec!["Call the echo tool!".into()],
3184 cache: false,
3185 reasoning_details: None,
3186 },
3187 LanguageModelRequestMessage {
3188 role: Role::Assistant,
3189 content: vec![language_model::MessageContent::ToolUse(tool_use_1.clone())],
3190 cache: false,
3191 reasoning_details: None,
3192 },
3193 LanguageModelRequestMessage {
3194 role: Role::User,
3195 content: vec![language_model::MessageContent::ToolResult(
3196 LanguageModelToolResult {
3197 tool_use_id: tool_use_1.id.clone(),
3198 tool_name: tool_use_1.name.clone(),
3199 is_error: false,
3200 content: "test".into(),
3201 output: Some("test".into())
3202 }
3203 )],
3204 cache: true,
3205 reasoning_details: None,
3206 },
3207 ]
3208 );
3209
3210 fake_model.send_last_completion_stream_text_chunk("Done");
3211 fake_model.end_last_completion_stream();
3212 cx.run_until_parked();
3213 events.collect::<Vec<_>>().await;
3214 thread.read_with(cx, |thread, _cx| {
3215 assert_eq!(
3216 thread.last_message(),
3217 Some(Message::Agent(AgentMessage {
3218 content: vec![AgentMessageContent::Text("Done".into())],
3219 tool_results: IndexMap::default(),
3220 reasoning_details: None,
3221 }))
3222 );
3223 })
3224}
3225
3226#[gpui::test]
3227async fn test_send_max_retries_exceeded(cx: &mut TestAppContext) {
3228 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3229 let fake_model = model.as_fake();
3230
3231 let mut events = thread
3232 .update(cx, |thread, cx| {
3233 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3234 thread.send(UserMessageId::new(), ["Hello!"], cx)
3235 })
3236 .unwrap();
3237 cx.run_until_parked();
3238
3239 for _ in 0..crate::thread::MAX_RETRY_ATTEMPTS + 1 {
3240 fake_model.send_last_completion_stream_error(
3241 LanguageModelCompletionError::ServerOverloaded {
3242 provider: LanguageModelProviderName::new("Anthropic"),
3243 retry_after: Some(Duration::from_secs(3)),
3244 },
3245 );
3246 fake_model.end_last_completion_stream();
3247 cx.executor().advance_clock(Duration::from_secs(3));
3248 cx.run_until_parked();
3249 }
3250
3251 let mut errors = Vec::new();
3252 let mut retry_events = Vec::new();
3253 while let Some(event) = events.next().await {
3254 match event {
3255 Ok(ThreadEvent::Retry(retry_status)) => {
3256 retry_events.push(retry_status);
3257 }
3258 Ok(ThreadEvent::Stop(..)) => break,
3259 Err(error) => errors.push(error),
3260 _ => {}
3261 }
3262 }
3263
3264 assert_eq!(
3265 retry_events.len(),
3266 crate::thread::MAX_RETRY_ATTEMPTS as usize
3267 );
3268 for i in 0..crate::thread::MAX_RETRY_ATTEMPTS as usize {
3269 assert_eq!(retry_events[i].attempt, i + 1);
3270 }
3271 assert_eq!(errors.len(), 1);
3272 let error = errors[0]
3273 .downcast_ref::<LanguageModelCompletionError>()
3274 .unwrap();
3275 assert!(matches!(
3276 error,
3277 LanguageModelCompletionError::ServerOverloaded { .. }
3278 ));
3279}
3280
3281/// Filters out the stop events for asserting against in tests
3282fn stop_events(result_events: Vec<Result<ThreadEvent>>) -> Vec<acp::StopReason> {
3283 result_events
3284 .into_iter()
3285 .filter_map(|event| match event.unwrap() {
3286 ThreadEvent::Stop(stop_reason) => Some(stop_reason),
3287 _ => None,
3288 })
3289 .collect()
3290}
3291
3292struct ThreadTest {
3293 model: Arc<dyn LanguageModel>,
3294 thread: Entity<Thread>,
3295 project_context: Entity<ProjectContext>,
3296 context_server_store: Entity<ContextServerStore>,
3297 fs: Arc<FakeFs>,
3298}
3299
3300enum TestModel {
3301 Sonnet4,
3302 Fake,
3303}
3304
3305impl TestModel {
3306 fn id(&self) -> LanguageModelId {
3307 match self {
3308 TestModel::Sonnet4 => LanguageModelId("claude-sonnet-4-latest".into()),
3309 TestModel::Fake => unreachable!(),
3310 }
3311 }
3312}
3313
3314async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
3315 cx.executor().allow_parking();
3316
3317 let fs = FakeFs::new(cx.background_executor.clone());
3318 fs.create_dir(paths::settings_file().parent().unwrap())
3319 .await
3320 .unwrap();
3321 fs.insert_file(
3322 paths::settings_file(),
3323 json!({
3324 "agent": {
3325 "default_profile": "test-profile",
3326 "profiles": {
3327 "test-profile": {
3328 "name": "Test Profile",
3329 "tools": {
3330 EchoTool::name(): true,
3331 DelayTool::name(): true,
3332 WordListTool::name(): true,
3333 ToolRequiringPermission::name(): true,
3334 InfiniteTool::name(): true,
3335 CancellationAwareTool::name(): true,
3336 ThinkingTool::name(): true,
3337 "terminal": true,
3338 }
3339 }
3340 }
3341 }
3342 })
3343 .to_string()
3344 .into_bytes(),
3345 )
3346 .await;
3347
3348 cx.update(|cx| {
3349 settings::init(cx);
3350
3351 match model {
3352 TestModel::Fake => {}
3353 TestModel::Sonnet4 => {
3354 gpui_tokio::init(cx);
3355 let http_client = ReqwestClient::user_agent("agent tests").unwrap();
3356 cx.set_http_client(Arc::new(http_client));
3357 let client = Client::production(cx);
3358 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
3359 language_model::init(client.clone(), cx);
3360 language_models::init(user_store, client.clone(), cx);
3361 }
3362 };
3363
3364 watch_settings(fs.clone(), cx);
3365 });
3366
3367 let templates = Templates::new();
3368
3369 fs.insert_tree(path!("/test"), json!({})).await;
3370 let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
3371
3372 let model = cx
3373 .update(|cx| {
3374 if let TestModel::Fake = model {
3375 Task::ready(Arc::new(FakeLanguageModel::default()) as Arc<_>)
3376 } else {
3377 let model_id = model.id();
3378 let models = LanguageModelRegistry::read_global(cx);
3379 let model = models
3380 .available_models(cx)
3381 .find(|model| model.id() == model_id)
3382 .unwrap();
3383
3384 let provider = models.provider(&model.provider_id()).unwrap();
3385 let authenticated = provider.authenticate(cx);
3386
3387 cx.spawn(async move |_cx| {
3388 authenticated.await.unwrap();
3389 model
3390 })
3391 }
3392 })
3393 .await;
3394
3395 let project_context = cx.new(|_cx| ProjectContext::default());
3396 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3397 let context_server_registry =
3398 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3399 let thread = cx.new(|cx| {
3400 Thread::new(
3401 project,
3402 project_context.clone(),
3403 context_server_registry,
3404 templates,
3405 Some(model.clone()),
3406 cx,
3407 )
3408 });
3409 ThreadTest {
3410 model,
3411 thread,
3412 project_context,
3413 context_server_store,
3414 fs,
3415 }
3416}
3417
3418#[cfg(test)]
3419#[ctor::ctor]
3420fn init_logger() {
3421 if std::env::var("RUST_LOG").is_ok() {
3422 env_logger::init();
3423 }
3424}
3425
3426fn watch_settings(fs: Arc<dyn Fs>, cx: &mut App) {
3427 let fs = fs.clone();
3428 cx.spawn({
3429 async move |cx| {
3430 let mut new_settings_content_rx = settings::watch_config_file(
3431 cx.background_executor(),
3432 fs,
3433 paths::settings_file().clone(),
3434 );
3435
3436 while let Some(new_settings_content) = new_settings_content_rx.next().await {
3437 cx.update(|cx| {
3438 SettingsStore::update_global(cx, |settings, cx| {
3439 settings.set_user_settings(&new_settings_content, cx)
3440 })
3441 })
3442 .ok();
3443 }
3444 }
3445 })
3446 .detach();
3447}
3448
3449fn tool_names_for_completion(completion: &LanguageModelRequest) -> Vec<String> {
3450 completion
3451 .tools
3452 .iter()
3453 .map(|tool| tool.name.clone())
3454 .collect()
3455}
3456
3457fn setup_context_server(
3458 name: &'static str,
3459 tools: Vec<context_server::types::Tool>,
3460 context_server_store: &Entity<ContextServerStore>,
3461 cx: &mut TestAppContext,
3462) -> mpsc::UnboundedReceiver<(
3463 context_server::types::CallToolParams,
3464 oneshot::Sender<context_server::types::CallToolResponse>,
3465)> {
3466 cx.update(|cx| {
3467 let mut settings = ProjectSettings::get_global(cx).clone();
3468 settings.context_servers.insert(
3469 name.into(),
3470 project::project_settings::ContextServerSettings::Stdio {
3471 enabled: true,
3472 command: ContextServerCommand {
3473 path: "somebinary".into(),
3474 args: Vec::new(),
3475 env: None,
3476 timeout: None,
3477 },
3478 },
3479 );
3480 ProjectSettings::override_global(settings, cx);
3481 });
3482
3483 let (mcp_tool_calls_tx, mcp_tool_calls_rx) = mpsc::unbounded();
3484 let fake_transport = context_server::test::create_fake_transport(name, cx.executor())
3485 .on_request::<context_server::types::requests::Initialize, _>(move |_params| async move {
3486 context_server::types::InitializeResponse {
3487 protocol_version: context_server::types::ProtocolVersion(
3488 context_server::types::LATEST_PROTOCOL_VERSION.to_string(),
3489 ),
3490 server_info: context_server::types::Implementation {
3491 name: name.into(),
3492 version: "1.0.0".to_string(),
3493 },
3494 capabilities: context_server::types::ServerCapabilities {
3495 tools: Some(context_server::types::ToolsCapabilities {
3496 list_changed: Some(true),
3497 }),
3498 ..Default::default()
3499 },
3500 meta: None,
3501 }
3502 })
3503 .on_request::<context_server::types::requests::ListTools, _>(move |_params| {
3504 let tools = tools.clone();
3505 async move {
3506 context_server::types::ListToolsResponse {
3507 tools,
3508 next_cursor: None,
3509 meta: None,
3510 }
3511 }
3512 })
3513 .on_request::<context_server::types::requests::CallTool, _>(move |params| {
3514 let mcp_tool_calls_tx = mcp_tool_calls_tx.clone();
3515 async move {
3516 let (response_tx, response_rx) = oneshot::channel();
3517 mcp_tool_calls_tx
3518 .unbounded_send((params, response_tx))
3519 .unwrap();
3520 response_rx.await.unwrap()
3521 }
3522 });
3523 context_server_store.update(cx, |store, cx| {
3524 store.start_server(
3525 Arc::new(ContextServer::new(
3526 ContextServerId(name.into()),
3527 Arc::new(fake_transport),
3528 )),
3529 cx,
3530 );
3531 });
3532 cx.run_until_parked();
3533 mcp_tool_calls_rx
3534}
3535
3536#[gpui::test]
3537async fn test_tokens_before_message(cx: &mut TestAppContext) {
3538 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3539 let fake_model = model.as_fake();
3540
3541 // First message
3542 let message_1_id = UserMessageId::new();
3543 thread
3544 .update(cx, |thread, cx| {
3545 thread.send(message_1_id.clone(), ["First message"], cx)
3546 })
3547 .unwrap();
3548 cx.run_until_parked();
3549
3550 // Before any response, tokens_before_message should return None for first message
3551 thread.read_with(cx, |thread, _| {
3552 assert_eq!(
3553 thread.tokens_before_message(&message_1_id),
3554 None,
3555 "First message should have no tokens before it"
3556 );
3557 });
3558
3559 // Complete first message with usage
3560 fake_model.send_last_completion_stream_text_chunk("Response 1");
3561 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3562 language_model::TokenUsage {
3563 input_tokens: 100,
3564 output_tokens: 50,
3565 cache_creation_input_tokens: 0,
3566 cache_read_input_tokens: 0,
3567 },
3568 ));
3569 fake_model.end_last_completion_stream();
3570 cx.run_until_parked();
3571
3572 // First message still has no tokens before it
3573 thread.read_with(cx, |thread, _| {
3574 assert_eq!(
3575 thread.tokens_before_message(&message_1_id),
3576 None,
3577 "First message should still have no tokens before it after response"
3578 );
3579 });
3580
3581 // Second message
3582 let message_2_id = UserMessageId::new();
3583 thread
3584 .update(cx, |thread, cx| {
3585 thread.send(message_2_id.clone(), ["Second message"], cx)
3586 })
3587 .unwrap();
3588 cx.run_until_parked();
3589
3590 // Second message should have first message's input tokens before it
3591 thread.read_with(cx, |thread, _| {
3592 assert_eq!(
3593 thread.tokens_before_message(&message_2_id),
3594 Some(100),
3595 "Second message should have 100 tokens before it (from first request)"
3596 );
3597 });
3598
3599 // Complete second message
3600 fake_model.send_last_completion_stream_text_chunk("Response 2");
3601 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3602 language_model::TokenUsage {
3603 input_tokens: 250, // Total for this request (includes previous context)
3604 output_tokens: 75,
3605 cache_creation_input_tokens: 0,
3606 cache_read_input_tokens: 0,
3607 },
3608 ));
3609 fake_model.end_last_completion_stream();
3610 cx.run_until_parked();
3611
3612 // Third message
3613 let message_3_id = UserMessageId::new();
3614 thread
3615 .update(cx, |thread, cx| {
3616 thread.send(message_3_id.clone(), ["Third message"], cx)
3617 })
3618 .unwrap();
3619 cx.run_until_parked();
3620
3621 // Third message should have second message's input tokens (250) before it
3622 thread.read_with(cx, |thread, _| {
3623 assert_eq!(
3624 thread.tokens_before_message(&message_3_id),
3625 Some(250),
3626 "Third message should have 250 tokens before it (from second request)"
3627 );
3628 // Second message should still have 100
3629 assert_eq!(
3630 thread.tokens_before_message(&message_2_id),
3631 Some(100),
3632 "Second message should still have 100 tokens before it"
3633 );
3634 // First message still has none
3635 assert_eq!(
3636 thread.tokens_before_message(&message_1_id),
3637 None,
3638 "First message should still have no tokens before it"
3639 );
3640 });
3641}
3642
3643#[gpui::test]
3644async fn test_tokens_before_message_after_truncate(cx: &mut TestAppContext) {
3645 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3646 let fake_model = model.as_fake();
3647
3648 // Set up three messages with responses
3649 let message_1_id = UserMessageId::new();
3650 thread
3651 .update(cx, |thread, cx| {
3652 thread.send(message_1_id.clone(), ["Message 1"], cx)
3653 })
3654 .unwrap();
3655 cx.run_until_parked();
3656 fake_model.send_last_completion_stream_text_chunk("Response 1");
3657 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3658 language_model::TokenUsage {
3659 input_tokens: 100,
3660 output_tokens: 50,
3661 cache_creation_input_tokens: 0,
3662 cache_read_input_tokens: 0,
3663 },
3664 ));
3665 fake_model.end_last_completion_stream();
3666 cx.run_until_parked();
3667
3668 let message_2_id = UserMessageId::new();
3669 thread
3670 .update(cx, |thread, cx| {
3671 thread.send(message_2_id.clone(), ["Message 2"], cx)
3672 })
3673 .unwrap();
3674 cx.run_until_parked();
3675 fake_model.send_last_completion_stream_text_chunk("Response 2");
3676 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3677 language_model::TokenUsage {
3678 input_tokens: 250,
3679 output_tokens: 75,
3680 cache_creation_input_tokens: 0,
3681 cache_read_input_tokens: 0,
3682 },
3683 ));
3684 fake_model.end_last_completion_stream();
3685 cx.run_until_parked();
3686
3687 // Verify initial state
3688 thread.read_with(cx, |thread, _| {
3689 assert_eq!(thread.tokens_before_message(&message_2_id), Some(100));
3690 });
3691
3692 // Truncate at message 2 (removes message 2 and everything after)
3693 thread
3694 .update(cx, |thread, cx| thread.truncate(message_2_id.clone(), cx))
3695 .unwrap();
3696 cx.run_until_parked();
3697
3698 // After truncation, message_2_id no longer exists, so lookup should return None
3699 thread.read_with(cx, |thread, _| {
3700 assert_eq!(
3701 thread.tokens_before_message(&message_2_id),
3702 None,
3703 "After truncation, message 2 no longer exists"
3704 );
3705 // Message 1 still exists but has no tokens before it
3706 assert_eq!(
3707 thread.tokens_before_message(&message_1_id),
3708 None,
3709 "First message still has no tokens before it"
3710 );
3711 });
3712}
3713
3714#[gpui::test]
3715async fn test_terminal_tool_permission_rules(cx: &mut TestAppContext) {
3716 init_test(cx);
3717
3718 let fs = FakeFs::new(cx.executor());
3719 fs.insert_tree("/root", json!({})).await;
3720 let project = Project::test(fs, ["/root".as_ref()], cx).await;
3721
3722 // Test 1: Deny rule blocks command
3723 {
3724 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3725 let environment = Rc::new(FakeThreadEnvironment {
3726 handle: handle.clone(),
3727 });
3728
3729 cx.update(|cx| {
3730 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3731 settings.tool_permissions.tools.insert(
3732 "terminal".into(),
3733 agent_settings::ToolRules {
3734 default_mode: settings::ToolPermissionMode::Confirm,
3735 always_allow: vec![],
3736 always_deny: vec![
3737 agent_settings::CompiledRegex::new(r"rm\s+-rf", false).unwrap(),
3738 ],
3739 always_confirm: vec![],
3740 invalid_patterns: vec![],
3741 },
3742 );
3743 agent_settings::AgentSettings::override_global(settings, cx);
3744 });
3745
3746 #[allow(clippy::arc_with_non_send_sync)]
3747 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3748 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3749
3750 let task = cx.update(|cx| {
3751 tool.run(
3752 crate::TerminalToolInput {
3753 command: "rm -rf /".to_string(),
3754 cd: ".".to_string(),
3755 timeout_ms: None,
3756 },
3757 event_stream,
3758 cx,
3759 )
3760 });
3761
3762 let result = task.await;
3763 assert!(
3764 result.is_err(),
3765 "expected command to be blocked by deny rule"
3766 );
3767 assert!(
3768 result.unwrap_err().to_string().contains("blocked"),
3769 "error should mention the command was blocked"
3770 );
3771 }
3772
3773 // Test 2: Allow rule skips confirmation (and overrides default_mode: Deny)
3774 {
3775 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3776 let environment = Rc::new(FakeThreadEnvironment {
3777 handle: handle.clone(),
3778 });
3779
3780 cx.update(|cx| {
3781 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3782 settings.always_allow_tool_actions = false;
3783 settings.tool_permissions.tools.insert(
3784 "terminal".into(),
3785 agent_settings::ToolRules {
3786 default_mode: settings::ToolPermissionMode::Deny,
3787 always_allow: vec![
3788 agent_settings::CompiledRegex::new(r"^echo\s", false).unwrap(),
3789 ],
3790 always_deny: vec![],
3791 always_confirm: vec![],
3792 invalid_patterns: vec![],
3793 },
3794 );
3795 agent_settings::AgentSettings::override_global(settings, cx);
3796 });
3797
3798 #[allow(clippy::arc_with_non_send_sync)]
3799 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3800 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3801
3802 let task = cx.update(|cx| {
3803 tool.run(
3804 crate::TerminalToolInput {
3805 command: "echo hello".to_string(),
3806 cd: ".".to_string(),
3807 timeout_ms: None,
3808 },
3809 event_stream,
3810 cx,
3811 )
3812 });
3813
3814 let update = rx.expect_update_fields().await;
3815 assert!(
3816 update.content.iter().any(|blocks| {
3817 blocks
3818 .iter()
3819 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
3820 }),
3821 "expected terminal content (allow rule should skip confirmation and override default deny)"
3822 );
3823
3824 let result = task.await;
3825 assert!(
3826 result.is_ok(),
3827 "expected command to succeed without confirmation"
3828 );
3829 }
3830
3831 // Test 3: Confirm rule forces confirmation even with always_allow_tool_actions=true
3832 {
3833 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3834 let environment = Rc::new(FakeThreadEnvironment {
3835 handle: handle.clone(),
3836 });
3837
3838 cx.update(|cx| {
3839 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3840 settings.always_allow_tool_actions = true;
3841 settings.tool_permissions.tools.insert(
3842 "terminal".into(),
3843 agent_settings::ToolRules {
3844 default_mode: settings::ToolPermissionMode::Allow,
3845 always_allow: vec![],
3846 always_deny: vec![],
3847 always_confirm: vec![
3848 agent_settings::CompiledRegex::new(r"sudo", false).unwrap(),
3849 ],
3850 invalid_patterns: vec![],
3851 },
3852 );
3853 agent_settings::AgentSettings::override_global(settings, cx);
3854 });
3855
3856 #[allow(clippy::arc_with_non_send_sync)]
3857 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3858 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3859
3860 let _task = cx.update(|cx| {
3861 tool.run(
3862 crate::TerminalToolInput {
3863 command: "sudo rm file".to_string(),
3864 cd: ".".to_string(),
3865 timeout_ms: None,
3866 },
3867 event_stream,
3868 cx,
3869 )
3870 });
3871
3872 let auth = rx.expect_authorization().await;
3873 assert!(
3874 auth.tool_call.fields.title.is_some(),
3875 "expected authorization request for sudo command despite always_allow_tool_actions=true"
3876 );
3877 }
3878
3879 // Test 4: default_mode: Deny blocks commands when no pattern matches
3880 {
3881 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3882 let environment = Rc::new(FakeThreadEnvironment {
3883 handle: handle.clone(),
3884 });
3885
3886 cx.update(|cx| {
3887 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3888 settings.always_allow_tool_actions = true;
3889 settings.tool_permissions.tools.insert(
3890 "terminal".into(),
3891 agent_settings::ToolRules {
3892 default_mode: settings::ToolPermissionMode::Deny,
3893 always_allow: vec![],
3894 always_deny: vec![],
3895 always_confirm: vec![],
3896 invalid_patterns: vec![],
3897 },
3898 );
3899 agent_settings::AgentSettings::override_global(settings, cx);
3900 });
3901
3902 #[allow(clippy::arc_with_non_send_sync)]
3903 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3904 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3905
3906 let task = cx.update(|cx| {
3907 tool.run(
3908 crate::TerminalToolInput {
3909 command: "echo hello".to_string(),
3910 cd: ".".to_string(),
3911 timeout_ms: None,
3912 },
3913 event_stream,
3914 cx,
3915 )
3916 });
3917
3918 let result = task.await;
3919 assert!(
3920 result.is_err(),
3921 "expected command to be blocked by default_mode: Deny"
3922 );
3923 assert!(
3924 result.unwrap_err().to_string().contains("disabled"),
3925 "error should mention the tool is disabled"
3926 );
3927 }
3928}
3929
3930#[gpui::test]
3931async fn test_subagent_tool_is_present_when_feature_flag_enabled(cx: &mut TestAppContext) {
3932 init_test(cx);
3933
3934 cx.update(|cx| {
3935 cx.update_flags(true, vec!["subagents".to_string()]);
3936 });
3937
3938 let fs = FakeFs::new(cx.executor());
3939 fs.insert_tree(path!("/test"), json!({})).await;
3940 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
3941 let project_context = cx.new(|_cx| ProjectContext::default());
3942 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3943 let context_server_registry =
3944 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3945 let model = Arc::new(FakeLanguageModel::default());
3946
3947 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3948 let environment = Rc::new(FakeThreadEnvironment { handle });
3949
3950 let thread = cx.new(|cx| {
3951 let mut thread = Thread::new(
3952 project.clone(),
3953 project_context,
3954 context_server_registry,
3955 Templates::new(),
3956 Some(model),
3957 cx,
3958 );
3959 thread.add_default_tools(environment, cx);
3960 thread
3961 });
3962
3963 thread.read_with(cx, |thread, _| {
3964 assert!(
3965 thread.has_registered_tool("subagent"),
3966 "subagent tool should be present when feature flag is enabled"
3967 );
3968 });
3969}
3970
3971#[gpui::test]
3972async fn test_subagent_tool_is_absent_when_feature_flag_disabled(cx: &mut TestAppContext) {
3973 init_test(cx);
3974
3975 cx.update(|cx| {
3976 cx.update_flags(false, vec![]);
3977 });
3978
3979 let fs = FakeFs::new(cx.executor());
3980 fs.insert_tree(path!("/test"), json!({})).await;
3981 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
3982 let project_context = cx.new(|_cx| ProjectContext::default());
3983 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3984 let context_server_registry =
3985 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3986 let model = Arc::new(FakeLanguageModel::default());
3987
3988 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3989 let environment = Rc::new(FakeThreadEnvironment { handle });
3990
3991 let thread = cx.new(|cx| {
3992 let mut thread = Thread::new(
3993 project.clone(),
3994 project_context,
3995 context_server_registry,
3996 Templates::new(),
3997 Some(model),
3998 cx,
3999 );
4000 thread.add_default_tools(environment, cx);
4001 thread
4002 });
4003
4004 thread.read_with(cx, |thread, _| {
4005 assert!(
4006 !thread.has_registered_tool("subagent"),
4007 "subagent tool should not be present when feature flag is disabled"
4008 );
4009 });
4010}
4011
4012#[gpui::test]
4013async fn test_subagent_thread_inherits_parent_model(cx: &mut TestAppContext) {
4014 init_test(cx);
4015
4016 cx.update(|cx| {
4017 cx.update_flags(true, vec!["subagents".to_string()]);
4018 });
4019
4020 let fs = FakeFs::new(cx.executor());
4021 fs.insert_tree(path!("/test"), json!({})).await;
4022 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4023 let project_context = cx.new(|_cx| ProjectContext::default());
4024 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4025 let context_server_registry =
4026 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4027 let model = Arc::new(FakeLanguageModel::default());
4028
4029 let subagent_context = SubagentContext {
4030 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4031 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4032 depth: 1,
4033 summary_prompt: "Summarize".to_string(),
4034 context_low_prompt: "Context low".to_string(),
4035 };
4036
4037 let subagent = cx.new(|cx| {
4038 Thread::new_subagent(
4039 project.clone(),
4040 project_context,
4041 context_server_registry,
4042 Templates::new(),
4043 model.clone(),
4044 subagent_context,
4045 cx,
4046 )
4047 });
4048
4049 subagent.read_with(cx, |thread, _| {
4050 assert!(thread.is_subagent());
4051 assert_eq!(thread.depth(), 1);
4052 assert!(thread.model().is_some());
4053 });
4054}
4055
4056#[gpui::test]
4057async fn test_max_subagent_depth_prevents_tool_registration(cx: &mut TestAppContext) {
4058 init_test(cx);
4059
4060 cx.update(|cx| {
4061 cx.update_flags(true, vec!["subagents".to_string()]);
4062 });
4063
4064 let fs = FakeFs::new(cx.executor());
4065 fs.insert_tree(path!("/test"), json!({})).await;
4066 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4067 let project_context = cx.new(|_cx| ProjectContext::default());
4068 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4069 let context_server_registry =
4070 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4071 let model = Arc::new(FakeLanguageModel::default());
4072
4073 let subagent_context = SubagentContext {
4074 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4075 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4076 depth: MAX_SUBAGENT_DEPTH,
4077 summary_prompt: "Summarize".to_string(),
4078 context_low_prompt: "Context low".to_string(),
4079 };
4080
4081 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
4082 let environment = Rc::new(FakeThreadEnvironment { handle });
4083
4084 let deep_subagent = cx.new(|cx| {
4085 let mut thread = Thread::new_subagent(
4086 project.clone(),
4087 project_context,
4088 context_server_registry,
4089 Templates::new(),
4090 model.clone(),
4091 subagent_context,
4092 cx,
4093 );
4094 thread.add_default_tools(environment, cx);
4095 thread
4096 });
4097
4098 deep_subagent.read_with(cx, |thread, _| {
4099 assert_eq!(thread.depth(), MAX_SUBAGENT_DEPTH);
4100 assert!(
4101 !thread.has_registered_tool("subagent"),
4102 "subagent tool should not be present at max depth"
4103 );
4104 });
4105}
4106
4107#[gpui::test]
4108async fn test_subagent_receives_task_prompt(cx: &mut TestAppContext) {
4109 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
4110 let fake_model = model.as_fake();
4111
4112 cx.update(|cx| {
4113 cx.update_flags(true, vec!["subagents".to_string()]);
4114 });
4115
4116 let subagent_context = SubagentContext {
4117 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4118 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4119 depth: 1,
4120 summary_prompt: "Summarize your work".to_string(),
4121 context_low_prompt: "Context low, wrap up".to_string(),
4122 };
4123
4124 let project = thread.read_with(cx, |t, _| t.project.clone());
4125 let project_context = cx.new(|_cx| ProjectContext::default());
4126 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4127 let context_server_registry =
4128 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4129
4130 let subagent = cx.new(|cx| {
4131 Thread::new_subagent(
4132 project.clone(),
4133 project_context,
4134 context_server_registry,
4135 Templates::new(),
4136 model.clone(),
4137 subagent_context,
4138 cx,
4139 )
4140 });
4141
4142 let task_prompt = "Find all TODO comments in the codebase";
4143 subagent
4144 .update(cx, |thread, cx| thread.submit_user_message(task_prompt, cx))
4145 .unwrap();
4146 cx.run_until_parked();
4147
4148 let pending = fake_model.pending_completions();
4149 assert_eq!(pending.len(), 1, "should have one pending completion");
4150
4151 let messages = &pending[0].messages;
4152 let user_messages: Vec<_> = messages
4153 .iter()
4154 .filter(|m| m.role == language_model::Role::User)
4155 .collect();
4156 assert_eq!(user_messages.len(), 1, "should have one user message");
4157
4158 let content = &user_messages[0].content[0];
4159 assert!(
4160 content.to_str().unwrap().contains("TODO"),
4161 "task prompt should be in user message"
4162 );
4163}
4164
4165#[gpui::test]
4166async fn test_subagent_returns_summary_on_completion(cx: &mut TestAppContext) {
4167 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
4168 let fake_model = model.as_fake();
4169
4170 cx.update(|cx| {
4171 cx.update_flags(true, vec!["subagents".to_string()]);
4172 });
4173
4174 let subagent_context = SubagentContext {
4175 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4176 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4177 depth: 1,
4178 summary_prompt: "Please summarize what you found".to_string(),
4179 context_low_prompt: "Context low, wrap up".to_string(),
4180 };
4181
4182 let project = thread.read_with(cx, |t, _| t.project.clone());
4183 let project_context = cx.new(|_cx| ProjectContext::default());
4184 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4185 let context_server_registry =
4186 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4187
4188 let subagent = cx.new(|cx| {
4189 Thread::new_subagent(
4190 project.clone(),
4191 project_context,
4192 context_server_registry,
4193 Templates::new(),
4194 model.clone(),
4195 subagent_context,
4196 cx,
4197 )
4198 });
4199
4200 subagent
4201 .update(cx, |thread, cx| {
4202 thread.submit_user_message("Do some work", cx)
4203 })
4204 .unwrap();
4205 cx.run_until_parked();
4206
4207 fake_model.send_last_completion_stream_text_chunk("I did the work");
4208 fake_model
4209 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
4210 fake_model.end_last_completion_stream();
4211 cx.run_until_parked();
4212
4213 subagent
4214 .update(cx, |thread, cx| thread.request_final_summary(cx))
4215 .unwrap();
4216 cx.run_until_parked();
4217
4218 let pending = fake_model.pending_completions();
4219 assert!(
4220 !pending.is_empty(),
4221 "should have pending completion for summary"
4222 );
4223
4224 let messages = &pending.last().unwrap().messages;
4225 let user_messages: Vec<_> = messages
4226 .iter()
4227 .filter(|m| m.role == language_model::Role::User)
4228 .collect();
4229
4230 let last_user = user_messages.last().unwrap();
4231 assert!(
4232 last_user.content[0].to_str().unwrap().contains("summarize"),
4233 "summary prompt should be sent"
4234 );
4235}
4236
4237#[gpui::test]
4238async fn test_allowed_tools_restricts_subagent_capabilities(cx: &mut TestAppContext) {
4239 init_test(cx);
4240
4241 cx.update(|cx| {
4242 cx.update_flags(true, vec!["subagents".to_string()]);
4243 });
4244
4245 let fs = FakeFs::new(cx.executor());
4246 fs.insert_tree(path!("/test"), json!({})).await;
4247 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4248 let project_context = cx.new(|_cx| ProjectContext::default());
4249 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4250 let context_server_registry =
4251 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4252 let model = Arc::new(FakeLanguageModel::default());
4253
4254 let subagent_context = SubagentContext {
4255 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4256 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4257 depth: 1,
4258 summary_prompt: "Summarize".to_string(),
4259 context_low_prompt: "Context low".to_string(),
4260 };
4261
4262 let subagent = cx.new(|cx| {
4263 let mut thread = Thread::new_subagent(
4264 project.clone(),
4265 project_context,
4266 context_server_registry,
4267 Templates::new(),
4268 model.clone(),
4269 subagent_context,
4270 cx,
4271 );
4272 thread.add_tool(EchoTool);
4273 thread.add_tool(DelayTool);
4274 thread.add_tool(WordListTool);
4275 thread
4276 });
4277
4278 subagent.read_with(cx, |thread, _| {
4279 assert!(thread.has_registered_tool("echo"));
4280 assert!(thread.has_registered_tool("delay"));
4281 assert!(thread.has_registered_tool("word_list"));
4282 });
4283
4284 let allowed: collections::HashSet<gpui::SharedString> =
4285 vec!["echo".into()].into_iter().collect();
4286
4287 subagent.update(cx, |thread, _cx| {
4288 thread.restrict_tools(&allowed);
4289 });
4290
4291 subagent.read_with(cx, |thread, _| {
4292 assert!(
4293 thread.has_registered_tool("echo"),
4294 "echo should still be available"
4295 );
4296 assert!(
4297 !thread.has_registered_tool("delay"),
4298 "delay should be removed"
4299 );
4300 assert!(
4301 !thread.has_registered_tool("word_list"),
4302 "word_list should be removed"
4303 );
4304 });
4305}
4306
4307#[gpui::test]
4308async fn test_parent_cancel_stops_subagent(cx: &mut TestAppContext) {
4309 init_test(cx);
4310
4311 cx.update(|cx| {
4312 cx.update_flags(true, vec!["subagents".to_string()]);
4313 });
4314
4315 let fs = FakeFs::new(cx.executor());
4316 fs.insert_tree(path!("/test"), json!({})).await;
4317 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4318 let project_context = cx.new(|_cx| ProjectContext::default());
4319 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4320 let context_server_registry =
4321 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4322 let model = Arc::new(FakeLanguageModel::default());
4323
4324 let parent = cx.new(|cx| {
4325 Thread::new(
4326 project.clone(),
4327 project_context.clone(),
4328 context_server_registry.clone(),
4329 Templates::new(),
4330 Some(model.clone()),
4331 cx,
4332 )
4333 });
4334
4335 let subagent_context = SubagentContext {
4336 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4337 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4338 depth: 1,
4339 summary_prompt: "Summarize".to_string(),
4340 context_low_prompt: "Context low".to_string(),
4341 };
4342
4343 let subagent = cx.new(|cx| {
4344 Thread::new_subagent(
4345 project.clone(),
4346 project_context.clone(),
4347 context_server_registry.clone(),
4348 Templates::new(),
4349 model.clone(),
4350 subagent_context,
4351 cx,
4352 )
4353 });
4354
4355 parent.update(cx, |thread, _cx| {
4356 thread.register_running_subagent(subagent.downgrade());
4357 });
4358
4359 subagent
4360 .update(cx, |thread, cx| thread.submit_user_message("Do work", cx))
4361 .unwrap();
4362 cx.run_until_parked();
4363
4364 subagent.read_with(cx, |thread, _| {
4365 assert!(!thread.is_turn_complete(), "subagent should be running");
4366 });
4367
4368 parent.update(cx, |thread, cx| {
4369 thread.cancel(cx).detach();
4370 });
4371
4372 subagent.read_with(cx, |thread, _| {
4373 assert!(
4374 thread.is_turn_complete(),
4375 "subagent should be cancelled when parent cancels"
4376 );
4377 });
4378}
4379
4380#[gpui::test]
4381async fn test_subagent_model_error_returned_as_tool_error(cx: &mut TestAppContext) {
4382 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
4383 let fake_model = model.as_fake();
4384
4385 cx.update(|cx| {
4386 cx.update_flags(true, vec!["subagents".to_string()]);
4387 });
4388
4389 let subagent_context = SubagentContext {
4390 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4391 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4392 depth: 1,
4393 summary_prompt: "Summarize".to_string(),
4394 context_low_prompt: "Context low".to_string(),
4395 };
4396
4397 let project = thread.read_with(cx, |t, _| t.project.clone());
4398 let project_context = cx.new(|_cx| ProjectContext::default());
4399 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4400 let context_server_registry =
4401 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4402
4403 let subagent = cx.new(|cx| {
4404 Thread::new_subagent(
4405 project.clone(),
4406 project_context,
4407 context_server_registry,
4408 Templates::new(),
4409 model.clone(),
4410 subagent_context,
4411 cx,
4412 )
4413 });
4414
4415 subagent
4416 .update(cx, |thread, cx| thread.submit_user_message("Do work", cx))
4417 .unwrap();
4418 cx.run_until_parked();
4419
4420 subagent.read_with(cx, |thread, _| {
4421 assert!(!thread.is_turn_complete(), "turn should be in progress");
4422 });
4423
4424 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::NoApiKey {
4425 provider: LanguageModelProviderName::from("Fake".to_string()),
4426 });
4427 fake_model.end_last_completion_stream();
4428 cx.run_until_parked();
4429
4430 subagent.read_with(cx, |thread, _| {
4431 assert!(
4432 thread.is_turn_complete(),
4433 "turn should be complete after non-retryable error"
4434 );
4435 });
4436}
4437
4438#[gpui::test]
4439async fn test_subagent_timeout_triggers_early_summary(cx: &mut TestAppContext) {
4440 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
4441 let fake_model = model.as_fake();
4442
4443 cx.update(|cx| {
4444 cx.update_flags(true, vec!["subagents".to_string()]);
4445 });
4446
4447 let subagent_context = SubagentContext {
4448 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4449 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4450 depth: 1,
4451 summary_prompt: "Summarize your work".to_string(),
4452 context_low_prompt: "Context low, stop and summarize".to_string(),
4453 };
4454
4455 let project = thread.read_with(cx, |t, _| t.project.clone());
4456 let project_context = cx.new(|_cx| ProjectContext::default());
4457 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4458 let context_server_registry =
4459 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4460
4461 let subagent = cx.new(|cx| {
4462 Thread::new_subagent(
4463 project.clone(),
4464 project_context.clone(),
4465 context_server_registry.clone(),
4466 Templates::new(),
4467 model.clone(),
4468 subagent_context.clone(),
4469 cx,
4470 )
4471 });
4472
4473 subagent.update(cx, |thread, _| {
4474 thread.add_tool(EchoTool);
4475 });
4476
4477 subagent
4478 .update(cx, |thread, cx| {
4479 thread.submit_user_message("Do some work", cx)
4480 })
4481 .unwrap();
4482 cx.run_until_parked();
4483
4484 fake_model.send_last_completion_stream_text_chunk("Working on it...");
4485 fake_model
4486 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
4487 fake_model.end_last_completion_stream();
4488 cx.run_until_parked();
4489
4490 let interrupt_result = subagent.update(cx, |thread, cx| thread.interrupt_for_summary(cx));
4491 assert!(
4492 interrupt_result.is_ok(),
4493 "interrupt_for_summary should succeed"
4494 );
4495
4496 cx.run_until_parked();
4497
4498 let pending = fake_model.pending_completions();
4499 assert!(
4500 !pending.is_empty(),
4501 "should have pending completion for interrupted summary"
4502 );
4503
4504 let messages = &pending.last().unwrap().messages;
4505 let user_messages: Vec<_> = messages
4506 .iter()
4507 .filter(|m| m.role == language_model::Role::User)
4508 .collect();
4509
4510 let last_user = user_messages.last().unwrap();
4511 let content_str = last_user.content[0].to_str().unwrap();
4512 assert!(
4513 content_str.contains("Context low") || content_str.contains("stop and summarize"),
4514 "context_low_prompt should be sent when interrupting: got {:?}",
4515 content_str
4516 );
4517}
4518
4519#[gpui::test]
4520async fn test_context_low_check_returns_true_when_usage_high(cx: &mut TestAppContext) {
4521 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
4522 let fake_model = model.as_fake();
4523
4524 cx.update(|cx| {
4525 cx.update_flags(true, vec!["subagents".to_string()]);
4526 });
4527
4528 let subagent_context = SubagentContext {
4529 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4530 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4531 depth: 1,
4532 summary_prompt: "Summarize".to_string(),
4533 context_low_prompt: "Context low".to_string(),
4534 };
4535
4536 let project = thread.read_with(cx, |t, _| t.project.clone());
4537 let project_context = cx.new(|_cx| ProjectContext::default());
4538 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4539 let context_server_registry =
4540 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4541
4542 let subagent = cx.new(|cx| {
4543 Thread::new_subagent(
4544 project.clone(),
4545 project_context,
4546 context_server_registry,
4547 Templates::new(),
4548 model.clone(),
4549 subagent_context,
4550 cx,
4551 )
4552 });
4553
4554 subagent
4555 .update(cx, |thread, cx| thread.submit_user_message("Do work", cx))
4556 .unwrap();
4557 cx.run_until_parked();
4558
4559 let max_tokens = model.max_token_count();
4560 let high_usage = language_model::TokenUsage {
4561 input_tokens: (max_tokens as f64 * 0.80) as u64,
4562 output_tokens: 0,
4563 cache_creation_input_tokens: 0,
4564 cache_read_input_tokens: 0,
4565 };
4566
4567 fake_model
4568 .send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(high_usage));
4569 fake_model.send_last_completion_stream_text_chunk("Working...");
4570 fake_model
4571 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
4572 fake_model.end_last_completion_stream();
4573 cx.run_until_parked();
4574
4575 let usage = subagent.read_with(cx, |thread, _| thread.latest_token_usage());
4576 assert!(usage.is_some(), "should have token usage after completion");
4577
4578 let usage = usage.unwrap();
4579 let remaining_ratio = 1.0 - (usage.used_tokens as f32 / usage.max_tokens as f32);
4580 assert!(
4581 remaining_ratio <= 0.25,
4582 "remaining ratio should be at or below 25% (got {}%), indicating context is low",
4583 remaining_ratio * 100.0
4584 );
4585}
4586
4587#[gpui::test]
4588async fn test_allowed_tools_rejects_unknown_tool(cx: &mut TestAppContext) {
4589 init_test(cx);
4590
4591 cx.update(|cx| {
4592 cx.update_flags(true, vec!["subagents".to_string()]);
4593 });
4594
4595 let fs = FakeFs::new(cx.executor());
4596 fs.insert_tree(path!("/test"), json!({})).await;
4597 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4598 let project_context = cx.new(|_cx| ProjectContext::default());
4599 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4600 let context_server_registry =
4601 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4602 let model = Arc::new(FakeLanguageModel::default());
4603
4604 let parent = cx.new(|cx| {
4605 let mut thread = Thread::new(
4606 project.clone(),
4607 project_context.clone(),
4608 context_server_registry.clone(),
4609 Templates::new(),
4610 Some(model.clone()),
4611 cx,
4612 );
4613 thread.add_tool(EchoTool);
4614 thread
4615 });
4616
4617 let parent_tool_names: Vec<gpui::SharedString> = vec!["echo".into()];
4618
4619 let tool = Arc::new(SubagentTool::new(
4620 parent.downgrade(),
4621 project,
4622 project_context,
4623 context_server_registry,
4624 Templates::new(),
4625 0,
4626 parent_tool_names,
4627 ));
4628
4629 let result = tool.validate_allowed_tools(&Some(vec!["nonexistent_tool".to_string()]));
4630 assert!(result.is_err(), "should reject unknown tool");
4631 let err_msg = result.unwrap_err().to_string();
4632 assert!(
4633 err_msg.contains("nonexistent_tool"),
4634 "error should mention the invalid tool name: {}",
4635 err_msg
4636 );
4637 assert!(
4638 err_msg.contains("not available"),
4639 "error should explain the tool is not available: {}",
4640 err_msg
4641 );
4642}
4643
4644#[gpui::test]
4645async fn test_subagent_empty_response_handled(cx: &mut TestAppContext) {
4646 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
4647 let fake_model = model.as_fake();
4648
4649 cx.update(|cx| {
4650 cx.update_flags(true, vec!["subagents".to_string()]);
4651 });
4652
4653 let subagent_context = SubagentContext {
4654 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4655 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4656 depth: 1,
4657 summary_prompt: "Summarize".to_string(),
4658 context_low_prompt: "Context low".to_string(),
4659 };
4660
4661 let project = thread.read_with(cx, |t, _| t.project.clone());
4662 let project_context = cx.new(|_cx| ProjectContext::default());
4663 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4664 let context_server_registry =
4665 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4666
4667 let subagent = cx.new(|cx| {
4668 Thread::new_subagent(
4669 project.clone(),
4670 project_context,
4671 context_server_registry,
4672 Templates::new(),
4673 model.clone(),
4674 subagent_context,
4675 cx,
4676 )
4677 });
4678
4679 subagent
4680 .update(cx, |thread, cx| thread.submit_user_message("Do work", cx))
4681 .unwrap();
4682 cx.run_until_parked();
4683
4684 fake_model
4685 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
4686 fake_model.end_last_completion_stream();
4687 cx.run_until_parked();
4688
4689 subagent.read_with(cx, |thread, _| {
4690 assert!(
4691 thread.is_turn_complete(),
4692 "turn should complete even with empty response"
4693 );
4694 });
4695}
4696
4697#[gpui::test]
4698async fn test_nested_subagent_at_depth_2_succeeds(cx: &mut TestAppContext) {
4699 init_test(cx);
4700
4701 cx.update(|cx| {
4702 cx.update_flags(true, vec!["subagents".to_string()]);
4703 });
4704
4705 let fs = FakeFs::new(cx.executor());
4706 fs.insert_tree(path!("/test"), json!({})).await;
4707 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4708 let project_context = cx.new(|_cx| ProjectContext::default());
4709 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4710 let context_server_registry =
4711 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4712 let model = Arc::new(FakeLanguageModel::default());
4713
4714 let depth_1_context = SubagentContext {
4715 parent_thread_id: agent_client_protocol::SessionId::new("root-id"),
4716 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-1"),
4717 depth: 1,
4718 summary_prompt: "Summarize".to_string(),
4719 context_low_prompt: "Context low".to_string(),
4720 };
4721
4722 let depth_1_subagent = cx.new(|cx| {
4723 Thread::new_subagent(
4724 project.clone(),
4725 project_context.clone(),
4726 context_server_registry.clone(),
4727 Templates::new(),
4728 model.clone(),
4729 depth_1_context,
4730 cx,
4731 )
4732 });
4733
4734 depth_1_subagent.read_with(cx, |thread, _| {
4735 assert_eq!(thread.depth(), 1);
4736 assert!(thread.is_subagent());
4737 });
4738
4739 let depth_2_context = SubagentContext {
4740 parent_thread_id: agent_client_protocol::SessionId::new("depth-1-id"),
4741 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-2"),
4742 depth: 2,
4743 summary_prompt: "Summarize depth 2".to_string(),
4744 context_low_prompt: "Context low depth 2".to_string(),
4745 };
4746
4747 let depth_2_subagent = cx.new(|cx| {
4748 Thread::new_subagent(
4749 project.clone(),
4750 project_context.clone(),
4751 context_server_registry.clone(),
4752 Templates::new(),
4753 model.clone(),
4754 depth_2_context,
4755 cx,
4756 )
4757 });
4758
4759 depth_2_subagent.read_with(cx, |thread, _| {
4760 assert_eq!(thread.depth(), 2);
4761 assert!(thread.is_subagent());
4762 });
4763
4764 depth_2_subagent
4765 .update(cx, |thread, cx| {
4766 thread.submit_user_message("Nested task", cx)
4767 })
4768 .unwrap();
4769 cx.run_until_parked();
4770
4771 let pending = model.as_fake().pending_completions();
4772 assert!(
4773 !pending.is_empty(),
4774 "depth-2 subagent should be able to submit messages"
4775 );
4776}
4777
4778#[gpui::test]
4779async fn test_subagent_uses_tool_and_returns_result(cx: &mut TestAppContext) {
4780 init_test(cx);
4781 always_allow_tools(cx);
4782
4783 cx.update(|cx| {
4784 cx.update_flags(true, vec!["subagents".to_string()]);
4785 });
4786
4787 let fs = FakeFs::new(cx.executor());
4788 fs.insert_tree(path!("/test"), json!({})).await;
4789 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4790 let project_context = cx.new(|_cx| ProjectContext::default());
4791 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4792 let context_server_registry =
4793 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4794 let model = Arc::new(FakeLanguageModel::default());
4795 let fake_model = model.as_fake();
4796
4797 let subagent_context = SubagentContext {
4798 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4799 tool_use_id: language_model::LanguageModelToolUseId::from("tool-use-id"),
4800 depth: 1,
4801 summary_prompt: "Summarize what you did".to_string(),
4802 context_low_prompt: "Context low".to_string(),
4803 };
4804
4805 let subagent = cx.new(|cx| {
4806 let mut thread = Thread::new_subagent(
4807 project.clone(),
4808 project_context,
4809 context_server_registry,
4810 Templates::new(),
4811 model.clone(),
4812 subagent_context,
4813 cx,
4814 );
4815 thread.add_tool(EchoTool);
4816 thread
4817 });
4818
4819 subagent.read_with(cx, |thread, _| {
4820 assert!(
4821 thread.has_registered_tool("echo"),
4822 "subagent should have echo tool"
4823 );
4824 });
4825
4826 subagent
4827 .update(cx, |thread, cx| {
4828 thread.submit_user_message("Use the echo tool to echo 'hello world'", cx)
4829 })
4830 .unwrap();
4831 cx.run_until_parked();
4832
4833 let tool_use = LanguageModelToolUse {
4834 id: "tool_call_1".into(),
4835 name: EchoTool::name().into(),
4836 raw_input: json!({"text": "hello world"}).to_string(),
4837 input: json!({"text": "hello world"}),
4838 is_input_complete: true,
4839 thought_signature: None,
4840 };
4841 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use));
4842 fake_model.end_last_completion_stream();
4843 cx.run_until_parked();
4844
4845 let pending = fake_model.pending_completions();
4846 assert!(
4847 !pending.is_empty(),
4848 "should have pending completion after tool use"
4849 );
4850
4851 let last_completion = pending.last().unwrap();
4852 let has_tool_result = last_completion.messages.iter().any(|m| {
4853 m.content
4854 .iter()
4855 .any(|c| matches!(c, MessageContent::ToolResult(_)))
4856 });
4857 assert!(
4858 has_tool_result,
4859 "tool result should be in the messages sent back to the model"
4860 );
4861}
4862
4863#[gpui::test]
4864async fn test_max_parallel_subagents_enforced(cx: &mut TestAppContext) {
4865 init_test(cx);
4866
4867 cx.update(|cx| {
4868 cx.update_flags(true, vec!["subagents".to_string()]);
4869 });
4870
4871 let fs = FakeFs::new(cx.executor());
4872 fs.insert_tree(path!("/test"), json!({})).await;
4873 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4874 let project_context = cx.new(|_cx| ProjectContext::default());
4875 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4876 let context_server_registry =
4877 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4878 let model = Arc::new(FakeLanguageModel::default());
4879
4880 let parent = cx.new(|cx| {
4881 Thread::new(
4882 project.clone(),
4883 project_context.clone(),
4884 context_server_registry.clone(),
4885 Templates::new(),
4886 Some(model.clone()),
4887 cx,
4888 )
4889 });
4890
4891 let mut subagents = Vec::new();
4892 for i in 0..MAX_PARALLEL_SUBAGENTS {
4893 let subagent_context = SubagentContext {
4894 parent_thread_id: agent_client_protocol::SessionId::new("parent-id"),
4895 tool_use_id: language_model::LanguageModelToolUseId::from(format!("tool-use-{}", i)),
4896 depth: 1,
4897 summary_prompt: "Summarize".to_string(),
4898 context_low_prompt: "Context low".to_string(),
4899 };
4900
4901 let subagent = cx.new(|cx| {
4902 Thread::new_subagent(
4903 project.clone(),
4904 project_context.clone(),
4905 context_server_registry.clone(),
4906 Templates::new(),
4907 model.clone(),
4908 subagent_context,
4909 cx,
4910 )
4911 });
4912
4913 parent.update(cx, |thread, _cx| {
4914 thread.register_running_subagent(subagent.downgrade());
4915 });
4916 subagents.push(subagent);
4917 }
4918
4919 parent.read_with(cx, |thread, _| {
4920 assert_eq!(
4921 thread.running_subagent_count(),
4922 MAX_PARALLEL_SUBAGENTS,
4923 "should have MAX_PARALLEL_SUBAGENTS registered"
4924 );
4925 });
4926
4927 let parent_tool_names: Vec<gpui::SharedString> = vec![];
4928
4929 let tool = Arc::new(SubagentTool::new(
4930 parent.downgrade(),
4931 project.clone(),
4932 project_context,
4933 context_server_registry,
4934 Templates::new(),
4935 0,
4936 parent_tool_names,
4937 ));
4938
4939 let (event_stream, _rx) = crate::ToolCallEventStream::test();
4940
4941 let result = cx.update(|cx| {
4942 tool.run(
4943 SubagentToolInput {
4944 label: "Test".to_string(),
4945 task_prompt: "Do something".to_string(),
4946 summary_prompt: "Summarize".to_string(),
4947 context_low_prompt: "Context low".to_string(),
4948 timeout_ms: None,
4949 allowed_tools: None,
4950 },
4951 event_stream,
4952 cx,
4953 )
4954 });
4955
4956 let err = result.await.unwrap_err();
4957 assert!(
4958 err.to_string().contains("Maximum parallel subagents"),
4959 "should reject when max parallel subagents reached: {}",
4960 err
4961 );
4962
4963 drop(subagents);
4964}
4965
4966#[gpui::test]
4967async fn test_subagent_tool_end_to_end(cx: &mut TestAppContext) {
4968 init_test(cx);
4969 always_allow_tools(cx);
4970
4971 cx.update(|cx| {
4972 cx.update_flags(true, vec!["subagents".to_string()]);
4973 });
4974
4975 let fs = FakeFs::new(cx.executor());
4976 fs.insert_tree(path!("/test"), json!({})).await;
4977 let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
4978 let project_context = cx.new(|_cx| ProjectContext::default());
4979 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
4980 let context_server_registry =
4981 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
4982 let model = Arc::new(FakeLanguageModel::default());
4983 let fake_model = model.as_fake();
4984
4985 let parent = cx.new(|cx| {
4986 let mut thread = Thread::new(
4987 project.clone(),
4988 project_context.clone(),
4989 context_server_registry.clone(),
4990 Templates::new(),
4991 Some(model.clone()),
4992 cx,
4993 );
4994 thread.add_tool(EchoTool);
4995 thread
4996 });
4997
4998 let parent_tool_names: Vec<gpui::SharedString> = vec!["echo".into()];
4999
5000 let tool = Arc::new(SubagentTool::new(
5001 parent.downgrade(),
5002 project.clone(),
5003 project_context,
5004 context_server_registry,
5005 Templates::new(),
5006 0,
5007 parent_tool_names,
5008 ));
5009
5010 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5011
5012 let task = cx.update(|cx| {
5013 tool.run(
5014 SubagentToolInput {
5015 label: "Research task".to_string(),
5016 task_prompt: "Find all TODOs in the codebase".to_string(),
5017 summary_prompt: "Summarize what you found".to_string(),
5018 context_low_prompt: "Context low, wrap up".to_string(),
5019 timeout_ms: None,
5020 allowed_tools: None,
5021 },
5022 event_stream,
5023 cx,
5024 )
5025 });
5026
5027 cx.run_until_parked();
5028
5029 let pending = fake_model.pending_completions();
5030 assert!(
5031 !pending.is_empty(),
5032 "subagent should have started and sent a completion request"
5033 );
5034
5035 let first_completion = &pending[0];
5036 let has_task_prompt = first_completion.messages.iter().any(|m| {
5037 m.role == language_model::Role::User
5038 && m.content
5039 .iter()
5040 .any(|c| c.to_str().map(|s| s.contains("TODO")).unwrap_or(false))
5041 });
5042 assert!(has_task_prompt, "task prompt should be sent to subagent");
5043
5044 fake_model.send_last_completion_stream_text_chunk("I found 5 TODOs in the codebase.");
5045 fake_model
5046 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
5047 fake_model.end_last_completion_stream();
5048 cx.run_until_parked();
5049
5050 let pending = fake_model.pending_completions();
5051 assert!(
5052 !pending.is_empty(),
5053 "should have pending completion for summary request"
5054 );
5055
5056 let last_completion = pending.last().unwrap();
5057 let has_summary_prompt = last_completion.messages.iter().any(|m| {
5058 m.role == language_model::Role::User
5059 && m.content.iter().any(|c| {
5060 c.to_str()
5061 .map(|s| s.contains("Summarize") || s.contains("summarize"))
5062 .unwrap_or(false)
5063 })
5064 });
5065 assert!(
5066 has_summary_prompt,
5067 "summary prompt should be sent after task completion"
5068 );
5069
5070 fake_model.send_last_completion_stream_text_chunk("Summary: Found 5 TODOs across 3 files.");
5071 fake_model
5072 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
5073 fake_model.end_last_completion_stream();
5074 cx.run_until_parked();
5075
5076 let result = task.await;
5077 assert!(result.is_ok(), "subagent tool should complete successfully");
5078
5079 let summary = result.unwrap();
5080 assert!(
5081 summary.contains("Summary") || summary.contains("TODO") || summary.contains("5"),
5082 "summary should contain subagent's response: {}",
5083 summary
5084 );
5085}
5086
5087#[gpui::test]
5088async fn test_edit_file_tool_deny_rule_blocks_edit(cx: &mut TestAppContext) {
5089 init_test(cx);
5090
5091 let fs = FakeFs::new(cx.executor());
5092 fs.insert_tree("/root", json!({"sensitive_config.txt": "secret data"}))
5093 .await;
5094 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5095
5096 cx.update(|cx| {
5097 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5098 settings.tool_permissions.tools.insert(
5099 "edit_file".into(),
5100 agent_settings::ToolRules {
5101 default_mode: settings::ToolPermissionMode::Allow,
5102 always_allow: vec![],
5103 always_deny: vec![agent_settings::CompiledRegex::new(r"sensitive", false).unwrap()],
5104 always_confirm: vec![],
5105 invalid_patterns: vec![],
5106 },
5107 );
5108 agent_settings::AgentSettings::override_global(settings, cx);
5109 });
5110
5111 let context_server_registry =
5112 cx.new(|cx| crate::ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
5113 let language_registry = project.read_with(cx, |project, _cx| project.languages().clone());
5114 let templates = crate::Templates::new();
5115 let thread = cx.new(|cx| {
5116 crate::Thread::new(
5117 project.clone(),
5118 cx.new(|_cx| prompt_store::ProjectContext::default()),
5119 context_server_registry,
5120 templates.clone(),
5121 None,
5122 cx,
5123 )
5124 });
5125
5126 #[allow(clippy::arc_with_non_send_sync)]
5127 let tool = Arc::new(crate::EditFileTool::new(
5128 project.clone(),
5129 thread.downgrade(),
5130 language_registry,
5131 templates,
5132 ));
5133 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5134
5135 let task = cx.update(|cx| {
5136 tool.run(
5137 crate::EditFileToolInput {
5138 display_description: "Edit sensitive file".to_string(),
5139 path: "root/sensitive_config.txt".into(),
5140 mode: crate::EditFileMode::Edit,
5141 },
5142 event_stream,
5143 cx,
5144 )
5145 });
5146
5147 let result = task.await;
5148 assert!(result.is_err(), "expected edit to be blocked");
5149 assert!(
5150 result.unwrap_err().to_string().contains("blocked"),
5151 "error should mention the edit was blocked"
5152 );
5153}
5154
5155#[gpui::test]
5156async fn test_delete_path_tool_deny_rule_blocks_deletion(cx: &mut TestAppContext) {
5157 init_test(cx);
5158
5159 let fs = FakeFs::new(cx.executor());
5160 fs.insert_tree("/root", json!({"important_data.txt": "critical info"}))
5161 .await;
5162 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5163
5164 cx.update(|cx| {
5165 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5166 settings.tool_permissions.tools.insert(
5167 "delete_path".into(),
5168 agent_settings::ToolRules {
5169 default_mode: settings::ToolPermissionMode::Allow,
5170 always_allow: vec![],
5171 always_deny: vec![agent_settings::CompiledRegex::new(r"important", false).unwrap()],
5172 always_confirm: vec![],
5173 invalid_patterns: vec![],
5174 },
5175 );
5176 agent_settings::AgentSettings::override_global(settings, cx);
5177 });
5178
5179 let action_log = cx.new(|_cx| action_log::ActionLog::new(project.clone()));
5180
5181 #[allow(clippy::arc_with_non_send_sync)]
5182 let tool = Arc::new(crate::DeletePathTool::new(project, action_log));
5183 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5184
5185 let task = cx.update(|cx| {
5186 tool.run(
5187 crate::DeletePathToolInput {
5188 path: "root/important_data.txt".to_string(),
5189 },
5190 event_stream,
5191 cx,
5192 )
5193 });
5194
5195 let result = task.await;
5196 assert!(result.is_err(), "expected deletion to be blocked");
5197 assert!(
5198 result.unwrap_err().to_string().contains("blocked"),
5199 "error should mention the deletion was blocked"
5200 );
5201}
5202
5203#[gpui::test]
5204async fn test_move_path_tool_denies_if_destination_denied(cx: &mut TestAppContext) {
5205 init_test(cx);
5206
5207 let fs = FakeFs::new(cx.executor());
5208 fs.insert_tree(
5209 "/root",
5210 json!({
5211 "safe.txt": "content",
5212 "protected": {}
5213 }),
5214 )
5215 .await;
5216 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5217
5218 cx.update(|cx| {
5219 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5220 settings.tool_permissions.tools.insert(
5221 "move_path".into(),
5222 agent_settings::ToolRules {
5223 default_mode: settings::ToolPermissionMode::Allow,
5224 always_allow: vec![],
5225 always_deny: vec![agent_settings::CompiledRegex::new(r"protected", false).unwrap()],
5226 always_confirm: vec![],
5227 invalid_patterns: vec![],
5228 },
5229 );
5230 agent_settings::AgentSettings::override_global(settings, cx);
5231 });
5232
5233 #[allow(clippy::arc_with_non_send_sync)]
5234 let tool = Arc::new(crate::MovePathTool::new(project));
5235 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5236
5237 let task = cx.update(|cx| {
5238 tool.run(
5239 crate::MovePathToolInput {
5240 source_path: "root/safe.txt".to_string(),
5241 destination_path: "root/protected/safe.txt".to_string(),
5242 },
5243 event_stream,
5244 cx,
5245 )
5246 });
5247
5248 let result = task.await;
5249 assert!(
5250 result.is_err(),
5251 "expected move to be blocked due to destination path"
5252 );
5253 assert!(
5254 result.unwrap_err().to_string().contains("blocked"),
5255 "error should mention the move was blocked"
5256 );
5257}
5258
5259#[gpui::test]
5260async fn test_move_path_tool_denies_if_source_denied(cx: &mut TestAppContext) {
5261 init_test(cx);
5262
5263 let fs = FakeFs::new(cx.executor());
5264 fs.insert_tree(
5265 "/root",
5266 json!({
5267 "secret.txt": "secret content",
5268 "public": {}
5269 }),
5270 )
5271 .await;
5272 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5273
5274 cx.update(|cx| {
5275 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5276 settings.tool_permissions.tools.insert(
5277 "move_path".into(),
5278 agent_settings::ToolRules {
5279 default_mode: settings::ToolPermissionMode::Allow,
5280 always_allow: vec![],
5281 always_deny: vec![agent_settings::CompiledRegex::new(r"secret", false).unwrap()],
5282 always_confirm: vec![],
5283 invalid_patterns: vec![],
5284 },
5285 );
5286 agent_settings::AgentSettings::override_global(settings, cx);
5287 });
5288
5289 #[allow(clippy::arc_with_non_send_sync)]
5290 let tool = Arc::new(crate::MovePathTool::new(project));
5291 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5292
5293 let task = cx.update(|cx| {
5294 tool.run(
5295 crate::MovePathToolInput {
5296 source_path: "root/secret.txt".to_string(),
5297 destination_path: "root/public/not_secret.txt".to_string(),
5298 },
5299 event_stream,
5300 cx,
5301 )
5302 });
5303
5304 let result = task.await;
5305 assert!(
5306 result.is_err(),
5307 "expected move to be blocked due to source path"
5308 );
5309 assert!(
5310 result.unwrap_err().to_string().contains("blocked"),
5311 "error should mention the move was blocked"
5312 );
5313}
5314
5315#[gpui::test]
5316async fn test_copy_path_tool_deny_rule_blocks_copy(cx: &mut TestAppContext) {
5317 init_test(cx);
5318
5319 let fs = FakeFs::new(cx.executor());
5320 fs.insert_tree(
5321 "/root",
5322 json!({
5323 "confidential.txt": "confidential data",
5324 "dest": {}
5325 }),
5326 )
5327 .await;
5328 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5329
5330 cx.update(|cx| {
5331 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5332 settings.tool_permissions.tools.insert(
5333 "copy_path".into(),
5334 agent_settings::ToolRules {
5335 default_mode: settings::ToolPermissionMode::Allow,
5336 always_allow: vec![],
5337 always_deny: vec![
5338 agent_settings::CompiledRegex::new(r"confidential", false).unwrap(),
5339 ],
5340 always_confirm: vec![],
5341 invalid_patterns: vec![],
5342 },
5343 );
5344 agent_settings::AgentSettings::override_global(settings, cx);
5345 });
5346
5347 #[allow(clippy::arc_with_non_send_sync)]
5348 let tool = Arc::new(crate::CopyPathTool::new(project));
5349 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5350
5351 let task = cx.update(|cx| {
5352 tool.run(
5353 crate::CopyPathToolInput {
5354 source_path: "root/confidential.txt".to_string(),
5355 destination_path: "root/dest/copy.txt".to_string(),
5356 },
5357 event_stream,
5358 cx,
5359 )
5360 });
5361
5362 let result = task.await;
5363 assert!(result.is_err(), "expected copy to be blocked");
5364 assert!(
5365 result.unwrap_err().to_string().contains("blocked"),
5366 "error should mention the copy was blocked"
5367 );
5368}
5369
5370#[gpui::test]
5371async fn test_save_file_tool_denies_if_any_path_denied(cx: &mut TestAppContext) {
5372 init_test(cx);
5373
5374 let fs = FakeFs::new(cx.executor());
5375 fs.insert_tree(
5376 "/root",
5377 json!({
5378 "normal.txt": "normal content",
5379 "readonly": {
5380 "config.txt": "readonly content"
5381 }
5382 }),
5383 )
5384 .await;
5385 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5386
5387 cx.update(|cx| {
5388 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5389 settings.tool_permissions.tools.insert(
5390 "save_file".into(),
5391 agent_settings::ToolRules {
5392 default_mode: settings::ToolPermissionMode::Allow,
5393 always_allow: vec![],
5394 always_deny: vec![agent_settings::CompiledRegex::new(r"readonly", false).unwrap()],
5395 always_confirm: vec![],
5396 invalid_patterns: vec![],
5397 },
5398 );
5399 agent_settings::AgentSettings::override_global(settings, cx);
5400 });
5401
5402 #[allow(clippy::arc_with_non_send_sync)]
5403 let tool = Arc::new(crate::SaveFileTool::new(project));
5404 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5405
5406 let task = cx.update(|cx| {
5407 tool.run(
5408 crate::SaveFileToolInput {
5409 paths: vec![
5410 std::path::PathBuf::from("root/normal.txt"),
5411 std::path::PathBuf::from("root/readonly/config.txt"),
5412 ],
5413 },
5414 event_stream,
5415 cx,
5416 )
5417 });
5418
5419 let result = task.await;
5420 assert!(
5421 result.is_err(),
5422 "expected save to be blocked due to denied path"
5423 );
5424 assert!(
5425 result.unwrap_err().to_string().contains("blocked"),
5426 "error should mention the save was blocked"
5427 );
5428}
5429
5430#[gpui::test]
5431async fn test_save_file_tool_respects_deny_rules(cx: &mut TestAppContext) {
5432 init_test(cx);
5433
5434 let fs = FakeFs::new(cx.executor());
5435 fs.insert_tree("/root", json!({"config.secret": "secret config"}))
5436 .await;
5437 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5438
5439 cx.update(|cx| {
5440 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5441 settings.always_allow_tool_actions = false;
5442 settings.tool_permissions.tools.insert(
5443 "save_file".into(),
5444 agent_settings::ToolRules {
5445 default_mode: settings::ToolPermissionMode::Allow,
5446 always_allow: vec![],
5447 always_deny: vec![agent_settings::CompiledRegex::new(r"\.secret$", false).unwrap()],
5448 always_confirm: vec![],
5449 invalid_patterns: vec![],
5450 },
5451 );
5452 agent_settings::AgentSettings::override_global(settings, cx);
5453 });
5454
5455 #[allow(clippy::arc_with_non_send_sync)]
5456 let tool = Arc::new(crate::SaveFileTool::new(project));
5457 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5458
5459 let task = cx.update(|cx| {
5460 tool.run(
5461 crate::SaveFileToolInput {
5462 paths: vec![std::path::PathBuf::from("root/config.secret")],
5463 },
5464 event_stream,
5465 cx,
5466 )
5467 });
5468
5469 let result = task.await;
5470 assert!(result.is_err(), "expected save to be blocked");
5471 assert!(
5472 result.unwrap_err().to_string().contains("blocked"),
5473 "error should mention the save was blocked"
5474 );
5475}
5476
5477#[gpui::test]
5478async fn test_web_search_tool_deny_rule_blocks_search(cx: &mut TestAppContext) {
5479 init_test(cx);
5480
5481 cx.update(|cx| {
5482 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5483 settings.tool_permissions.tools.insert(
5484 "web_search".into(),
5485 agent_settings::ToolRules {
5486 default_mode: settings::ToolPermissionMode::Allow,
5487 always_allow: vec![],
5488 always_deny: vec![
5489 agent_settings::CompiledRegex::new(r"internal\.company", false).unwrap(),
5490 ],
5491 always_confirm: vec![],
5492 invalid_patterns: vec![],
5493 },
5494 );
5495 agent_settings::AgentSettings::override_global(settings, cx);
5496 });
5497
5498 #[allow(clippy::arc_with_non_send_sync)]
5499 let tool = Arc::new(crate::WebSearchTool);
5500 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5501
5502 let input: crate::WebSearchToolInput =
5503 serde_json::from_value(json!({"query": "internal.company.com secrets"})).unwrap();
5504
5505 let task = cx.update(|cx| tool.run(input, event_stream, cx));
5506
5507 let result = task.await;
5508 assert!(result.is_err(), "expected search to be blocked");
5509 assert!(
5510 result.unwrap_err().to_string().contains("blocked"),
5511 "error should mention the search was blocked"
5512 );
5513}
5514
5515#[gpui::test]
5516async fn test_edit_file_tool_allow_rule_skips_confirmation(cx: &mut TestAppContext) {
5517 init_test(cx);
5518
5519 let fs = FakeFs::new(cx.executor());
5520 fs.insert_tree("/root", json!({"README.md": "# Hello"}))
5521 .await;
5522 let project = Project::test(fs.clone(), ["/root".as_ref()], cx).await;
5523
5524 cx.update(|cx| {
5525 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5526 settings.always_allow_tool_actions = false;
5527 settings.tool_permissions.tools.insert(
5528 "edit_file".into(),
5529 agent_settings::ToolRules {
5530 default_mode: settings::ToolPermissionMode::Confirm,
5531 always_allow: vec![agent_settings::CompiledRegex::new(r"\.md$", false).unwrap()],
5532 always_deny: vec![],
5533 always_confirm: vec![],
5534 invalid_patterns: vec![],
5535 },
5536 );
5537 agent_settings::AgentSettings::override_global(settings, cx);
5538 });
5539
5540 let context_server_registry =
5541 cx.new(|cx| crate::ContextServerRegistry::new(project.read(cx).context_server_store(), cx));
5542 let language_registry = project.read_with(cx, |project, _cx| project.languages().clone());
5543 let templates = crate::Templates::new();
5544 let thread = cx.new(|cx| {
5545 crate::Thread::new(
5546 project.clone(),
5547 cx.new(|_cx| prompt_store::ProjectContext::default()),
5548 context_server_registry,
5549 templates.clone(),
5550 None,
5551 cx,
5552 )
5553 });
5554
5555 #[allow(clippy::arc_with_non_send_sync)]
5556 let tool = Arc::new(crate::EditFileTool::new(
5557 project,
5558 thread.downgrade(),
5559 language_registry,
5560 templates,
5561 ));
5562 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
5563
5564 let _task = cx.update(|cx| {
5565 tool.run(
5566 crate::EditFileToolInput {
5567 display_description: "Edit README".to_string(),
5568 path: "root/README.md".into(),
5569 mode: crate::EditFileMode::Edit,
5570 },
5571 event_stream,
5572 cx,
5573 )
5574 });
5575
5576 cx.run_until_parked();
5577
5578 let event = rx.try_next();
5579 assert!(
5580 !matches!(event, Ok(Some(Ok(ThreadEvent::ToolCallAuthorization(_))))),
5581 "expected no authorization request for allowed .md file"
5582 );
5583}
5584
5585#[gpui::test]
5586async fn test_fetch_tool_deny_rule_blocks_url(cx: &mut TestAppContext) {
5587 init_test(cx);
5588
5589 cx.update(|cx| {
5590 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5591 settings.tool_permissions.tools.insert(
5592 "fetch".into(),
5593 agent_settings::ToolRules {
5594 default_mode: settings::ToolPermissionMode::Allow,
5595 always_allow: vec![],
5596 always_deny: vec![
5597 agent_settings::CompiledRegex::new(r"internal\.company\.com", false).unwrap(),
5598 ],
5599 always_confirm: vec![],
5600 invalid_patterns: vec![],
5601 },
5602 );
5603 agent_settings::AgentSettings::override_global(settings, cx);
5604 });
5605
5606 let http_client = gpui::http_client::FakeHttpClient::with_200_response();
5607
5608 #[allow(clippy::arc_with_non_send_sync)]
5609 let tool = Arc::new(crate::FetchTool::new(http_client));
5610 let (event_stream, _rx) = crate::ToolCallEventStream::test();
5611
5612 let input: crate::FetchToolInput =
5613 serde_json::from_value(json!({"url": "https://internal.company.com/api"})).unwrap();
5614
5615 let task = cx.update(|cx| tool.run(input, event_stream, cx));
5616
5617 let result = task.await;
5618 assert!(result.is_err(), "expected fetch to be blocked");
5619 assert!(
5620 result.unwrap_err().to_string().contains("blocked"),
5621 "error should mention the fetch was blocked"
5622 );
5623}
5624
5625#[gpui::test]
5626async fn test_fetch_tool_allow_rule_skips_confirmation(cx: &mut TestAppContext) {
5627 init_test(cx);
5628
5629 cx.update(|cx| {
5630 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
5631 settings.always_allow_tool_actions = false;
5632 settings.tool_permissions.tools.insert(
5633 "fetch".into(),
5634 agent_settings::ToolRules {
5635 default_mode: settings::ToolPermissionMode::Confirm,
5636 always_allow: vec![agent_settings::CompiledRegex::new(r"docs\.rs", false).unwrap()],
5637 always_deny: vec![],
5638 always_confirm: vec![],
5639 invalid_patterns: vec![],
5640 },
5641 );
5642 agent_settings::AgentSettings::override_global(settings, cx);
5643 });
5644
5645 let http_client = gpui::http_client::FakeHttpClient::with_200_response();
5646
5647 #[allow(clippy::arc_with_non_send_sync)]
5648 let tool = Arc::new(crate::FetchTool::new(http_client));
5649 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
5650
5651 let input: crate::FetchToolInput =
5652 serde_json::from_value(json!({"url": "https://docs.rs/some-crate"})).unwrap();
5653
5654 let _task = cx.update(|cx| tool.run(input, event_stream, cx));
5655
5656 cx.run_until_parked();
5657
5658 let event = rx.try_next();
5659 assert!(
5660 !matches!(event, Ok(Some(Ok(ThreadEvent::ToolCallAuthorization(_))))),
5661 "expected no authorization request for allowed docs.rs URL"
5662 );
5663}