1use super::*;
2use acp_thread::{AgentConnection, AgentModelGroupName, AgentModelList, UserMessageId};
3use agent_client_protocol::{self as acp};
4use agent_settings::AgentProfileId;
5use anyhow::Result;
6use client::{Client, UserStore};
7use cloud_llm_client::CompletionIntent;
8use collections::IndexMap;
9use context_server::{ContextServer, ContextServerCommand, ContextServerId};
10use fs::{FakeFs, Fs};
11use futures::{
12 FutureExt as _, StreamExt,
13 channel::{
14 mpsc::{self, UnboundedReceiver},
15 oneshot,
16 },
17 future::{Fuse, Shared},
18};
19use gpui::{
20 App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal,
21 http_client::FakeHttpClient,
22};
23use indoc::indoc;
24use language_model::{
25 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
26 LanguageModelProviderName, LanguageModelRegistry, LanguageModelRequest,
27 LanguageModelRequestMessage, LanguageModelToolResult, LanguageModelToolSchemaFormat,
28 LanguageModelToolUse, MessageContent, Role, StopReason, fake_provider::FakeLanguageModel,
29};
30use pretty_assertions::assert_eq;
31use project::{
32 Project, context_server_store::ContextServerStore, project_settings::ProjectSettings,
33};
34use prompt_store::ProjectContext;
35use reqwest_client::ReqwestClient;
36use schemars::JsonSchema;
37use serde::{Deserialize, Serialize};
38use serde_json::json;
39use settings::{Settings, SettingsStore};
40use std::{
41 path::Path,
42 pin::Pin,
43 rc::Rc,
44 sync::{
45 Arc,
46 atomic::{AtomicBool, Ordering},
47 },
48 time::Duration,
49};
50use util::path;
51
52mod test_tools;
53use test_tools::*;
54
55fn init_test(cx: &mut TestAppContext) {
56 cx.update(|cx| {
57 let settings_store = SettingsStore::test(cx);
58 cx.set_global(settings_store);
59 });
60}
61
62struct FakeTerminalHandle {
63 killed: Arc<AtomicBool>,
64 stopped_by_user: Arc<AtomicBool>,
65 exit_sender: std::cell::RefCell<Option<futures::channel::oneshot::Sender<()>>>,
66 wait_for_exit: Shared<Task<acp::TerminalExitStatus>>,
67 output: acp::TerminalOutputResponse,
68 id: acp::TerminalId,
69}
70
71impl FakeTerminalHandle {
72 fn new_never_exits(cx: &mut App) -> Self {
73 let killed = Arc::new(AtomicBool::new(false));
74 let stopped_by_user = Arc::new(AtomicBool::new(false));
75
76 let (exit_sender, exit_receiver) = futures::channel::oneshot::channel();
77
78 let wait_for_exit = cx
79 .spawn(async move |_cx| {
80 // Wait for the exit signal (sent when kill() is called)
81 let _ = exit_receiver.await;
82 acp::TerminalExitStatus::new()
83 })
84 .shared();
85
86 Self {
87 killed,
88 stopped_by_user,
89 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
90 wait_for_exit,
91 output: acp::TerminalOutputResponse::new("partial output".to_string(), false),
92 id: acp::TerminalId::new("fake_terminal".to_string()),
93 }
94 }
95
96 fn new_with_immediate_exit(cx: &mut App, exit_code: u32) -> Self {
97 let killed = Arc::new(AtomicBool::new(false));
98 let stopped_by_user = Arc::new(AtomicBool::new(false));
99 let (exit_sender, _exit_receiver) = futures::channel::oneshot::channel();
100
101 let wait_for_exit = cx
102 .spawn(async move |_cx| acp::TerminalExitStatus::new().exit_code(exit_code))
103 .shared();
104
105 Self {
106 killed,
107 stopped_by_user,
108 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
109 wait_for_exit,
110 output: acp::TerminalOutputResponse::new("command output".to_string(), false),
111 id: acp::TerminalId::new("fake_terminal".to_string()),
112 }
113 }
114
115 fn was_killed(&self) -> bool {
116 self.killed.load(Ordering::SeqCst)
117 }
118
119 fn set_stopped_by_user(&self, stopped: bool) {
120 self.stopped_by_user.store(stopped, Ordering::SeqCst);
121 }
122
123 fn signal_exit(&self) {
124 if let Some(sender) = self.exit_sender.borrow_mut().take() {
125 let _ = sender.send(());
126 }
127 }
128}
129
130impl crate::TerminalHandle for FakeTerminalHandle {
131 fn id(&self, _cx: &AsyncApp) -> Result<acp::TerminalId> {
132 Ok(self.id.clone())
133 }
134
135 fn current_output(&self, _cx: &AsyncApp) -> Result<acp::TerminalOutputResponse> {
136 Ok(self.output.clone())
137 }
138
139 fn wait_for_exit(&self, _cx: &AsyncApp) -> Result<Shared<Task<acp::TerminalExitStatus>>> {
140 Ok(self.wait_for_exit.clone())
141 }
142
143 fn kill(&self, _cx: &AsyncApp) -> Result<()> {
144 self.killed.store(true, Ordering::SeqCst);
145 self.signal_exit();
146 Ok(())
147 }
148
149 fn was_stopped_by_user(&self, _cx: &AsyncApp) -> Result<bool> {
150 Ok(self.stopped_by_user.load(Ordering::SeqCst))
151 }
152}
153
154struct FakeThreadEnvironment {
155 handle: Rc<FakeTerminalHandle>,
156}
157
158impl crate::ThreadEnvironment for FakeThreadEnvironment {
159 fn create_terminal(
160 &self,
161 _command: String,
162 _cwd: Option<std::path::PathBuf>,
163 _output_byte_limit: Option<u64>,
164 _cx: &mut AsyncApp,
165 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
166 Task::ready(Ok(self.handle.clone() as Rc<dyn crate::TerminalHandle>))
167 }
168}
169
170/// Environment that creates multiple independent terminal handles for testing concurrent terminals.
171struct MultiTerminalEnvironment {
172 handles: std::cell::RefCell<Vec<Rc<FakeTerminalHandle>>>,
173}
174
175impl MultiTerminalEnvironment {
176 fn new() -> Self {
177 Self {
178 handles: std::cell::RefCell::new(Vec::new()),
179 }
180 }
181
182 fn handles(&self) -> Vec<Rc<FakeTerminalHandle>> {
183 self.handles.borrow().clone()
184 }
185}
186
187impl crate::ThreadEnvironment for MultiTerminalEnvironment {
188 fn create_terminal(
189 &self,
190 _command: String,
191 _cwd: Option<std::path::PathBuf>,
192 _output_byte_limit: Option<u64>,
193 cx: &mut AsyncApp,
194 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
195 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
196 self.handles.borrow_mut().push(handle.clone());
197 Task::ready(Ok(handle as Rc<dyn crate::TerminalHandle>))
198 }
199}
200
201fn always_allow_tools(cx: &mut TestAppContext) {
202 cx.update(|cx| {
203 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
204 settings.always_allow_tool_actions = true;
205 agent_settings::AgentSettings::override_global(settings, cx);
206 });
207}
208
209#[gpui::test]
210async fn test_echo(cx: &mut TestAppContext) {
211 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
212 let fake_model = model.as_fake();
213
214 let events = thread
215 .update(cx, |thread, cx| {
216 thread.send(UserMessageId::new(), ["Testing: Reply with 'Hello'"], cx)
217 })
218 .unwrap();
219 cx.run_until_parked();
220 fake_model.send_last_completion_stream_text_chunk("Hello");
221 fake_model
222 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
223 fake_model.end_last_completion_stream();
224
225 let events = events.collect().await;
226 thread.update(cx, |thread, _cx| {
227 assert_eq!(
228 thread.last_message().unwrap().to_markdown(),
229 indoc! {"
230 ## Assistant
231
232 Hello
233 "}
234 )
235 });
236 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
237}
238
239#[gpui::test]
240async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) {
241 init_test(cx);
242 always_allow_tools(cx);
243
244 let fs = FakeFs::new(cx.executor());
245 let project = Project::test(fs, [], cx).await;
246
247 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
248 let environment = Rc::new(FakeThreadEnvironment {
249 handle: handle.clone(),
250 });
251
252 #[allow(clippy::arc_with_non_send_sync)]
253 let tool = Arc::new(crate::TerminalTool::new(project, environment));
254 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
255
256 let task = cx.update(|cx| {
257 tool.run(
258 crate::TerminalToolInput {
259 command: "sleep 1000".to_string(),
260 cd: ".".to_string(),
261 timeout_ms: Some(5),
262 },
263 event_stream,
264 cx,
265 )
266 });
267
268 let update = rx.expect_update_fields().await;
269 assert!(
270 update.content.iter().any(|blocks| {
271 blocks
272 .iter()
273 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
274 }),
275 "expected tool call update to include terminal content"
276 );
277
278 let mut task_future: Pin<Box<Fuse<Task<Result<String>>>>> = Box::pin(task.fuse());
279
280 let deadline = std::time::Instant::now() + Duration::from_millis(500);
281 loop {
282 if let Some(result) = task_future.as_mut().now_or_never() {
283 let result = result.expect("terminal tool task should complete");
284
285 assert!(
286 handle.was_killed(),
287 "expected terminal handle to be killed on timeout"
288 );
289 assert!(
290 result.contains("partial output"),
291 "expected result to include terminal output, got: {result}"
292 );
293 return;
294 }
295
296 if std::time::Instant::now() >= deadline {
297 panic!("timed out waiting for terminal tool task to complete");
298 }
299
300 cx.run_until_parked();
301 cx.background_executor.timer(Duration::from_millis(1)).await;
302 }
303}
304
305#[gpui::test]
306#[ignore]
307async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) {
308 init_test(cx);
309 always_allow_tools(cx);
310
311 let fs = FakeFs::new(cx.executor());
312 let project = Project::test(fs, [], cx).await;
313
314 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
315 let environment = Rc::new(FakeThreadEnvironment {
316 handle: handle.clone(),
317 });
318
319 #[allow(clippy::arc_with_non_send_sync)]
320 let tool = Arc::new(crate::TerminalTool::new(project, environment));
321 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
322
323 let _task = cx.update(|cx| {
324 tool.run(
325 crate::TerminalToolInput {
326 command: "sleep 1000".to_string(),
327 cd: ".".to_string(),
328 timeout_ms: None,
329 },
330 event_stream,
331 cx,
332 )
333 });
334
335 let update = rx.expect_update_fields().await;
336 assert!(
337 update.content.iter().any(|blocks| {
338 blocks
339 .iter()
340 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
341 }),
342 "expected tool call update to include terminal content"
343 );
344
345 smol::Timer::after(Duration::from_millis(25)).await;
346
347 assert!(
348 !handle.was_killed(),
349 "did not expect terminal handle to be killed without a timeout"
350 );
351}
352
353#[gpui::test]
354async fn test_thinking(cx: &mut TestAppContext) {
355 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
356 let fake_model = model.as_fake();
357
358 let events = thread
359 .update(cx, |thread, cx| {
360 thread.send(
361 UserMessageId::new(),
362 [indoc! {"
363 Testing:
364
365 Generate a thinking step where you just think the word 'Think',
366 and have your final answer be 'Hello'
367 "}],
368 cx,
369 )
370 })
371 .unwrap();
372 cx.run_until_parked();
373 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Thinking {
374 text: "Think".to_string(),
375 signature: None,
376 });
377 fake_model.send_last_completion_stream_text_chunk("Hello");
378 fake_model
379 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
380 fake_model.end_last_completion_stream();
381
382 let events = events.collect().await;
383 thread.update(cx, |thread, _cx| {
384 assert_eq!(
385 thread.last_message().unwrap().to_markdown(),
386 indoc! {"
387 ## Assistant
388
389 <think>Think</think>
390 Hello
391 "}
392 )
393 });
394 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
395}
396
397#[gpui::test]
398async fn test_system_prompt(cx: &mut TestAppContext) {
399 let ThreadTest {
400 model,
401 thread,
402 project_context,
403 ..
404 } = setup(cx, TestModel::Fake).await;
405 let fake_model = model.as_fake();
406
407 project_context.update(cx, |project_context, _cx| {
408 project_context.shell = "test-shell".into()
409 });
410 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
411 thread
412 .update(cx, |thread, cx| {
413 thread.send(UserMessageId::new(), ["abc"], cx)
414 })
415 .unwrap();
416 cx.run_until_parked();
417 let mut pending_completions = fake_model.pending_completions();
418 assert_eq!(
419 pending_completions.len(),
420 1,
421 "unexpected pending completions: {:?}",
422 pending_completions
423 );
424
425 let pending_completion = pending_completions.pop().unwrap();
426 assert_eq!(pending_completion.messages[0].role, Role::System);
427
428 let system_message = &pending_completion.messages[0];
429 let system_prompt = system_message.content[0].to_str().unwrap();
430 assert!(
431 system_prompt.contains("test-shell"),
432 "unexpected system message: {:?}",
433 system_message
434 );
435 assert!(
436 system_prompt.contains("## Fixing Diagnostics"),
437 "unexpected system message: {:?}",
438 system_message
439 );
440}
441
442#[gpui::test]
443async fn test_system_prompt_without_tools(cx: &mut TestAppContext) {
444 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
445 let fake_model = model.as_fake();
446
447 thread
448 .update(cx, |thread, cx| {
449 thread.send(UserMessageId::new(), ["abc"], cx)
450 })
451 .unwrap();
452 cx.run_until_parked();
453 let mut pending_completions = fake_model.pending_completions();
454 assert_eq!(
455 pending_completions.len(),
456 1,
457 "unexpected pending completions: {:?}",
458 pending_completions
459 );
460
461 let pending_completion = pending_completions.pop().unwrap();
462 assert_eq!(pending_completion.messages[0].role, Role::System);
463
464 let system_message = &pending_completion.messages[0];
465 let system_prompt = system_message.content[0].to_str().unwrap();
466 assert!(
467 !system_prompt.contains("## Tool Use"),
468 "unexpected system message: {:?}",
469 system_message
470 );
471 assert!(
472 !system_prompt.contains("## Fixing Diagnostics"),
473 "unexpected system message: {:?}",
474 system_message
475 );
476}
477
478#[gpui::test]
479async fn test_prompt_caching(cx: &mut TestAppContext) {
480 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
481 let fake_model = model.as_fake();
482
483 // Send initial user message and verify it's cached
484 thread
485 .update(cx, |thread, cx| {
486 thread.send(UserMessageId::new(), ["Message 1"], cx)
487 })
488 .unwrap();
489 cx.run_until_parked();
490
491 let completion = fake_model.pending_completions().pop().unwrap();
492 assert_eq!(
493 completion.messages[1..],
494 vec![LanguageModelRequestMessage {
495 role: Role::User,
496 content: vec!["Message 1".into()],
497 cache: true,
498 reasoning_details: None,
499 }]
500 );
501 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
502 "Response to Message 1".into(),
503 ));
504 fake_model.end_last_completion_stream();
505 cx.run_until_parked();
506
507 // Send another user message and verify only the latest is cached
508 thread
509 .update(cx, |thread, cx| {
510 thread.send(UserMessageId::new(), ["Message 2"], cx)
511 })
512 .unwrap();
513 cx.run_until_parked();
514
515 let completion = fake_model.pending_completions().pop().unwrap();
516 assert_eq!(
517 completion.messages[1..],
518 vec![
519 LanguageModelRequestMessage {
520 role: Role::User,
521 content: vec!["Message 1".into()],
522 cache: false,
523 reasoning_details: None,
524 },
525 LanguageModelRequestMessage {
526 role: Role::Assistant,
527 content: vec!["Response to Message 1".into()],
528 cache: false,
529 reasoning_details: None,
530 },
531 LanguageModelRequestMessage {
532 role: Role::User,
533 content: vec!["Message 2".into()],
534 cache: true,
535 reasoning_details: None,
536 }
537 ]
538 );
539 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
540 "Response to Message 2".into(),
541 ));
542 fake_model.end_last_completion_stream();
543 cx.run_until_parked();
544
545 // Simulate a tool call and verify that the latest tool result is cached
546 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
547 thread
548 .update(cx, |thread, cx| {
549 thread.send(UserMessageId::new(), ["Use the echo tool"], cx)
550 })
551 .unwrap();
552 cx.run_until_parked();
553
554 let tool_use = LanguageModelToolUse {
555 id: "tool_1".into(),
556 name: EchoTool::name().into(),
557 raw_input: json!({"text": "test"}).to_string(),
558 input: json!({"text": "test"}),
559 is_input_complete: true,
560 thought_signature: None,
561 };
562 fake_model
563 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
564 fake_model.end_last_completion_stream();
565 cx.run_until_parked();
566
567 let completion = fake_model.pending_completions().pop().unwrap();
568 let tool_result = LanguageModelToolResult {
569 tool_use_id: "tool_1".into(),
570 tool_name: EchoTool::name().into(),
571 is_error: false,
572 content: "test".into(),
573 output: Some("test".into()),
574 };
575 assert_eq!(
576 completion.messages[1..],
577 vec![
578 LanguageModelRequestMessage {
579 role: Role::User,
580 content: vec!["Message 1".into()],
581 cache: false,
582 reasoning_details: None,
583 },
584 LanguageModelRequestMessage {
585 role: Role::Assistant,
586 content: vec!["Response to Message 1".into()],
587 cache: false,
588 reasoning_details: None,
589 },
590 LanguageModelRequestMessage {
591 role: Role::User,
592 content: vec!["Message 2".into()],
593 cache: false,
594 reasoning_details: None,
595 },
596 LanguageModelRequestMessage {
597 role: Role::Assistant,
598 content: vec!["Response to Message 2".into()],
599 cache: false,
600 reasoning_details: None,
601 },
602 LanguageModelRequestMessage {
603 role: Role::User,
604 content: vec!["Use the echo tool".into()],
605 cache: false,
606 reasoning_details: None,
607 },
608 LanguageModelRequestMessage {
609 role: Role::Assistant,
610 content: vec![MessageContent::ToolUse(tool_use)],
611 cache: false,
612 reasoning_details: None,
613 },
614 LanguageModelRequestMessage {
615 role: Role::User,
616 content: vec![MessageContent::ToolResult(tool_result)],
617 cache: true,
618 reasoning_details: None,
619 }
620 ]
621 );
622}
623
624#[gpui::test]
625#[cfg_attr(not(feature = "e2e"), ignore)]
626async fn test_basic_tool_calls(cx: &mut TestAppContext) {
627 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
628
629 // Test a tool call that's likely to complete *before* streaming stops.
630 let events = thread
631 .update(cx, |thread, cx| {
632 thread.add_tool(EchoTool);
633 thread.send(
634 UserMessageId::new(),
635 ["Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'."],
636 cx,
637 )
638 })
639 .unwrap()
640 .collect()
641 .await;
642 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
643
644 // Test a tool calls that's likely to complete *after* streaming stops.
645 let events = thread
646 .update(cx, |thread, cx| {
647 thread.remove_tool(&EchoTool::name());
648 thread.add_tool(DelayTool);
649 thread.send(
650 UserMessageId::new(),
651 [
652 "Now call the delay tool with 200ms.",
653 "When the timer goes off, then you echo the output of the tool.",
654 ],
655 cx,
656 )
657 })
658 .unwrap()
659 .collect()
660 .await;
661 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
662 thread.update(cx, |thread, _cx| {
663 assert!(
664 thread
665 .last_message()
666 .unwrap()
667 .as_agent_message()
668 .unwrap()
669 .content
670 .iter()
671 .any(|content| {
672 if let AgentMessageContent::Text(text) = content {
673 text.contains("Ding")
674 } else {
675 false
676 }
677 }),
678 "{}",
679 thread.to_markdown()
680 );
681 });
682}
683
684#[gpui::test]
685#[cfg_attr(not(feature = "e2e"), ignore)]
686async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
687 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
688
689 // Test a tool call that's likely to complete *before* streaming stops.
690 let mut events = thread
691 .update(cx, |thread, cx| {
692 thread.add_tool(WordListTool);
693 thread.send(UserMessageId::new(), ["Test the word_list tool."], cx)
694 })
695 .unwrap();
696
697 let mut saw_partial_tool_use = false;
698 while let Some(event) = events.next().await {
699 if let Ok(ThreadEvent::ToolCall(tool_call)) = event {
700 thread.update(cx, |thread, _cx| {
701 // Look for a tool use in the thread's last message
702 let message = thread.last_message().unwrap();
703 let agent_message = message.as_agent_message().unwrap();
704 let last_content = agent_message.content.last().unwrap();
705 if let AgentMessageContent::ToolUse(last_tool_use) = last_content {
706 assert_eq!(last_tool_use.name.as_ref(), "word_list");
707 if tool_call.status == acp::ToolCallStatus::Pending {
708 if !last_tool_use.is_input_complete
709 && last_tool_use.input.get("g").is_none()
710 {
711 saw_partial_tool_use = true;
712 }
713 } else {
714 last_tool_use
715 .input
716 .get("a")
717 .expect("'a' has streamed because input is now complete");
718 last_tool_use
719 .input
720 .get("g")
721 .expect("'g' has streamed because input is now complete");
722 }
723 } else {
724 panic!("last content should be a tool use");
725 }
726 });
727 }
728 }
729
730 assert!(
731 saw_partial_tool_use,
732 "should see at least one partially streamed tool use in the history"
733 );
734}
735
736#[gpui::test]
737async fn test_tool_authorization(cx: &mut TestAppContext) {
738 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
739 let fake_model = model.as_fake();
740
741 let mut events = thread
742 .update(cx, |thread, cx| {
743 thread.add_tool(ToolRequiringPermission);
744 thread.send(UserMessageId::new(), ["abc"], cx)
745 })
746 .unwrap();
747 cx.run_until_parked();
748 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
749 LanguageModelToolUse {
750 id: "tool_id_1".into(),
751 name: ToolRequiringPermission::name().into(),
752 raw_input: "{}".into(),
753 input: json!({}),
754 is_input_complete: true,
755 thought_signature: None,
756 },
757 ));
758 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
759 LanguageModelToolUse {
760 id: "tool_id_2".into(),
761 name: ToolRequiringPermission::name().into(),
762 raw_input: "{}".into(),
763 input: json!({}),
764 is_input_complete: true,
765 thought_signature: None,
766 },
767 ));
768 fake_model.end_last_completion_stream();
769 let tool_call_auth_1 = next_tool_call_authorization(&mut events).await;
770 let tool_call_auth_2 = next_tool_call_authorization(&mut events).await;
771
772 // Approve the first
773 tool_call_auth_1
774 .response
775 .send(tool_call_auth_1.options[1].option_id.clone())
776 .unwrap();
777 cx.run_until_parked();
778
779 // Reject the second
780 tool_call_auth_2
781 .response
782 .send(tool_call_auth_1.options[2].option_id.clone())
783 .unwrap();
784 cx.run_until_parked();
785
786 let completion = fake_model.pending_completions().pop().unwrap();
787 let message = completion.messages.last().unwrap();
788 assert_eq!(
789 message.content,
790 vec![
791 language_model::MessageContent::ToolResult(LanguageModelToolResult {
792 tool_use_id: tool_call_auth_1.tool_call.tool_call_id.0.to_string().into(),
793 tool_name: ToolRequiringPermission::name().into(),
794 is_error: false,
795 content: "Allowed".into(),
796 output: Some("Allowed".into())
797 }),
798 language_model::MessageContent::ToolResult(LanguageModelToolResult {
799 tool_use_id: tool_call_auth_2.tool_call.tool_call_id.0.to_string().into(),
800 tool_name: ToolRequiringPermission::name().into(),
801 is_error: true,
802 content: "Permission to run tool denied by user".into(),
803 output: Some("Permission to run tool denied by user".into())
804 })
805 ]
806 );
807
808 // Simulate yet another tool call.
809 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
810 LanguageModelToolUse {
811 id: "tool_id_3".into(),
812 name: ToolRequiringPermission::name().into(),
813 raw_input: "{}".into(),
814 input: json!({}),
815 is_input_complete: true,
816 thought_signature: None,
817 },
818 ));
819 fake_model.end_last_completion_stream();
820
821 // Respond by always allowing tools.
822 let tool_call_auth_3 = next_tool_call_authorization(&mut events).await;
823 tool_call_auth_3
824 .response
825 .send(tool_call_auth_3.options[0].option_id.clone())
826 .unwrap();
827 cx.run_until_parked();
828 let completion = fake_model.pending_completions().pop().unwrap();
829 let message = completion.messages.last().unwrap();
830 assert_eq!(
831 message.content,
832 vec![language_model::MessageContent::ToolResult(
833 LanguageModelToolResult {
834 tool_use_id: tool_call_auth_3.tool_call.tool_call_id.0.to_string().into(),
835 tool_name: ToolRequiringPermission::name().into(),
836 is_error: false,
837 content: "Allowed".into(),
838 output: Some("Allowed".into())
839 }
840 )]
841 );
842
843 // Simulate a final tool call, ensuring we don't trigger authorization.
844 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
845 LanguageModelToolUse {
846 id: "tool_id_4".into(),
847 name: ToolRequiringPermission::name().into(),
848 raw_input: "{}".into(),
849 input: json!({}),
850 is_input_complete: true,
851 thought_signature: None,
852 },
853 ));
854 fake_model.end_last_completion_stream();
855 cx.run_until_parked();
856 let completion = fake_model.pending_completions().pop().unwrap();
857 let message = completion.messages.last().unwrap();
858 assert_eq!(
859 message.content,
860 vec![language_model::MessageContent::ToolResult(
861 LanguageModelToolResult {
862 tool_use_id: "tool_id_4".into(),
863 tool_name: ToolRequiringPermission::name().into(),
864 is_error: false,
865 content: "Allowed".into(),
866 output: Some("Allowed".into())
867 }
868 )]
869 );
870}
871
872#[gpui::test]
873async fn test_tool_hallucination(cx: &mut TestAppContext) {
874 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
875 let fake_model = model.as_fake();
876
877 let mut events = thread
878 .update(cx, |thread, cx| {
879 thread.send(UserMessageId::new(), ["abc"], cx)
880 })
881 .unwrap();
882 cx.run_until_parked();
883 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
884 LanguageModelToolUse {
885 id: "tool_id_1".into(),
886 name: "nonexistent_tool".into(),
887 raw_input: "{}".into(),
888 input: json!({}),
889 is_input_complete: true,
890 thought_signature: None,
891 },
892 ));
893 fake_model.end_last_completion_stream();
894
895 let tool_call = expect_tool_call(&mut events).await;
896 assert_eq!(tool_call.title, "nonexistent_tool");
897 assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
898 let update = expect_tool_call_update_fields(&mut events).await;
899 assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
900}
901
902#[gpui::test]
903async fn test_resume_after_tool_use_limit(cx: &mut TestAppContext) {
904 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
905 let fake_model = model.as_fake();
906
907 let events = thread
908 .update(cx, |thread, cx| {
909 thread.add_tool(EchoTool);
910 thread.send(UserMessageId::new(), ["abc"], cx)
911 })
912 .unwrap();
913 cx.run_until_parked();
914 let tool_use = LanguageModelToolUse {
915 id: "tool_id_1".into(),
916 name: EchoTool::name().into(),
917 raw_input: "{}".into(),
918 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
919 is_input_complete: true,
920 thought_signature: None,
921 };
922 fake_model
923 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
924 fake_model.end_last_completion_stream();
925
926 cx.run_until_parked();
927 let completion = fake_model.pending_completions().pop().unwrap();
928 let tool_result = LanguageModelToolResult {
929 tool_use_id: "tool_id_1".into(),
930 tool_name: EchoTool::name().into(),
931 is_error: false,
932 content: "def".into(),
933 output: Some("def".into()),
934 };
935 assert_eq!(
936 completion.messages[1..],
937 vec![
938 LanguageModelRequestMessage {
939 role: Role::User,
940 content: vec!["abc".into()],
941 cache: false,
942 reasoning_details: None,
943 },
944 LanguageModelRequestMessage {
945 role: Role::Assistant,
946 content: vec![MessageContent::ToolUse(tool_use.clone())],
947 cache: false,
948 reasoning_details: None,
949 },
950 LanguageModelRequestMessage {
951 role: Role::User,
952 content: vec![MessageContent::ToolResult(tool_result.clone())],
953 cache: true,
954 reasoning_details: None,
955 },
956 ]
957 );
958
959 // Simulate reaching tool use limit.
960 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
961 fake_model.end_last_completion_stream();
962 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
963 assert!(
964 last_event
965 .unwrap_err()
966 .is::<language_model::ToolUseLimitReachedError>()
967 );
968
969 let events = thread.update(cx, |thread, cx| thread.resume(cx)).unwrap();
970 cx.run_until_parked();
971 let completion = fake_model.pending_completions().pop().unwrap();
972 assert_eq!(
973 completion.messages[1..],
974 vec![
975 LanguageModelRequestMessage {
976 role: Role::User,
977 content: vec!["abc".into()],
978 cache: false,
979 reasoning_details: None,
980 },
981 LanguageModelRequestMessage {
982 role: Role::Assistant,
983 content: vec![MessageContent::ToolUse(tool_use)],
984 cache: false,
985 reasoning_details: None,
986 },
987 LanguageModelRequestMessage {
988 role: Role::User,
989 content: vec![MessageContent::ToolResult(tool_result)],
990 cache: false,
991 reasoning_details: None,
992 },
993 LanguageModelRequestMessage {
994 role: Role::User,
995 content: vec!["Continue where you left off".into()],
996 cache: true,
997 reasoning_details: None,
998 }
999 ]
1000 );
1001
1002 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text("Done".into()));
1003 fake_model.end_last_completion_stream();
1004 events.collect::<Vec<_>>().await;
1005 thread.read_with(cx, |thread, _cx| {
1006 assert_eq!(
1007 thread.last_message().unwrap().to_markdown(),
1008 indoc! {"
1009 ## Assistant
1010
1011 Done
1012 "}
1013 )
1014 });
1015}
1016
1017#[gpui::test]
1018async fn test_send_after_tool_use_limit(cx: &mut TestAppContext) {
1019 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1020 let fake_model = model.as_fake();
1021
1022 let events = thread
1023 .update(cx, |thread, cx| {
1024 thread.add_tool(EchoTool);
1025 thread.send(UserMessageId::new(), ["abc"], cx)
1026 })
1027 .unwrap();
1028 cx.run_until_parked();
1029
1030 let tool_use = LanguageModelToolUse {
1031 id: "tool_id_1".into(),
1032 name: EchoTool::name().into(),
1033 raw_input: "{}".into(),
1034 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
1035 is_input_complete: true,
1036 thought_signature: None,
1037 };
1038 let tool_result = LanguageModelToolResult {
1039 tool_use_id: "tool_id_1".into(),
1040 tool_name: EchoTool::name().into(),
1041 is_error: false,
1042 content: "def".into(),
1043 output: Some("def".into()),
1044 };
1045 fake_model
1046 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
1047 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
1048 fake_model.end_last_completion_stream();
1049 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
1050 assert!(
1051 last_event
1052 .unwrap_err()
1053 .is::<language_model::ToolUseLimitReachedError>()
1054 );
1055
1056 thread
1057 .update(cx, |thread, cx| {
1058 thread.send(UserMessageId::new(), vec!["ghi"], cx)
1059 })
1060 .unwrap();
1061 cx.run_until_parked();
1062 let completion = fake_model.pending_completions().pop().unwrap();
1063 assert_eq!(
1064 completion.messages[1..],
1065 vec![
1066 LanguageModelRequestMessage {
1067 role: Role::User,
1068 content: vec!["abc".into()],
1069 cache: false,
1070 reasoning_details: None,
1071 },
1072 LanguageModelRequestMessage {
1073 role: Role::Assistant,
1074 content: vec![MessageContent::ToolUse(tool_use)],
1075 cache: false,
1076 reasoning_details: None,
1077 },
1078 LanguageModelRequestMessage {
1079 role: Role::User,
1080 content: vec![MessageContent::ToolResult(tool_result)],
1081 cache: false,
1082 reasoning_details: None,
1083 },
1084 LanguageModelRequestMessage {
1085 role: Role::User,
1086 content: vec!["ghi".into()],
1087 cache: true,
1088 reasoning_details: None,
1089 }
1090 ]
1091 );
1092}
1093
1094async fn expect_tool_call(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::ToolCall {
1095 let event = events
1096 .next()
1097 .await
1098 .expect("no tool call authorization event received")
1099 .unwrap();
1100 match event {
1101 ThreadEvent::ToolCall(tool_call) => tool_call,
1102 event => {
1103 panic!("Unexpected event {event:?}");
1104 }
1105 }
1106}
1107
1108async fn expect_tool_call_update_fields(
1109 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1110) -> acp::ToolCallUpdate {
1111 let event = events
1112 .next()
1113 .await
1114 .expect("no tool call authorization event received")
1115 .unwrap();
1116 match event {
1117 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(update)) => update,
1118 event => {
1119 panic!("Unexpected event {event:?}");
1120 }
1121 }
1122}
1123
1124async fn next_tool_call_authorization(
1125 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1126) -> ToolCallAuthorization {
1127 loop {
1128 let event = events
1129 .next()
1130 .await
1131 .expect("no tool call authorization event received")
1132 .unwrap();
1133 if let ThreadEvent::ToolCallAuthorization(tool_call_authorization) = event {
1134 let permission_kinds = tool_call_authorization
1135 .options
1136 .iter()
1137 .map(|o| o.kind)
1138 .collect::<Vec<_>>();
1139 assert_eq!(
1140 permission_kinds,
1141 vec![
1142 acp::PermissionOptionKind::AllowAlways,
1143 acp::PermissionOptionKind::AllowOnce,
1144 acp::PermissionOptionKind::RejectOnce,
1145 ]
1146 );
1147 return tool_call_authorization;
1148 }
1149 }
1150}
1151
1152#[gpui::test]
1153#[cfg_attr(not(feature = "e2e"), ignore)]
1154async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
1155 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1156
1157 // Test concurrent tool calls with different delay times
1158 let events = thread
1159 .update(cx, |thread, cx| {
1160 thread.add_tool(DelayTool);
1161 thread.send(
1162 UserMessageId::new(),
1163 [
1164 "Call the delay tool twice in the same message.",
1165 "Once with 100ms. Once with 300ms.",
1166 "When both timers are complete, describe the outputs.",
1167 ],
1168 cx,
1169 )
1170 })
1171 .unwrap()
1172 .collect()
1173 .await;
1174
1175 let stop_reasons = stop_events(events);
1176 assert_eq!(stop_reasons, vec![acp::StopReason::EndTurn]);
1177
1178 thread.update(cx, |thread, _cx| {
1179 let last_message = thread.last_message().unwrap();
1180 let agent_message = last_message.as_agent_message().unwrap();
1181 let text = agent_message
1182 .content
1183 .iter()
1184 .filter_map(|content| {
1185 if let AgentMessageContent::Text(text) = content {
1186 Some(text.as_str())
1187 } else {
1188 None
1189 }
1190 })
1191 .collect::<String>();
1192
1193 assert!(text.contains("Ding"));
1194 });
1195}
1196
1197#[gpui::test]
1198async fn test_profiles(cx: &mut TestAppContext) {
1199 let ThreadTest {
1200 model, thread, fs, ..
1201 } = setup(cx, TestModel::Fake).await;
1202 let fake_model = model.as_fake();
1203
1204 thread.update(cx, |thread, _cx| {
1205 thread.add_tool(DelayTool);
1206 thread.add_tool(EchoTool);
1207 thread.add_tool(InfiniteTool);
1208 });
1209
1210 // Override profiles and wait for settings to be loaded.
1211 fs.insert_file(
1212 paths::settings_file(),
1213 json!({
1214 "agent": {
1215 "profiles": {
1216 "test-1": {
1217 "name": "Test Profile 1",
1218 "tools": {
1219 EchoTool::name(): true,
1220 DelayTool::name(): true,
1221 }
1222 },
1223 "test-2": {
1224 "name": "Test Profile 2",
1225 "tools": {
1226 InfiniteTool::name(): true,
1227 }
1228 }
1229 }
1230 }
1231 })
1232 .to_string()
1233 .into_bytes(),
1234 )
1235 .await;
1236 cx.run_until_parked();
1237
1238 // Test that test-1 profile (default) has echo and delay tools
1239 thread
1240 .update(cx, |thread, cx| {
1241 thread.set_profile(AgentProfileId("test-1".into()), cx);
1242 thread.send(UserMessageId::new(), ["test"], cx)
1243 })
1244 .unwrap();
1245 cx.run_until_parked();
1246
1247 let mut pending_completions = fake_model.pending_completions();
1248 assert_eq!(pending_completions.len(), 1);
1249 let completion = pending_completions.pop().unwrap();
1250 let tool_names: Vec<String> = completion
1251 .tools
1252 .iter()
1253 .map(|tool| tool.name.clone())
1254 .collect();
1255 assert_eq!(tool_names, vec![DelayTool::name(), EchoTool::name()]);
1256 fake_model.end_last_completion_stream();
1257
1258 // Switch to test-2 profile, and verify that it has only the infinite tool.
1259 thread
1260 .update(cx, |thread, cx| {
1261 thread.set_profile(AgentProfileId("test-2".into()), cx);
1262 thread.send(UserMessageId::new(), ["test2"], cx)
1263 })
1264 .unwrap();
1265 cx.run_until_parked();
1266 let mut pending_completions = fake_model.pending_completions();
1267 assert_eq!(pending_completions.len(), 1);
1268 let completion = pending_completions.pop().unwrap();
1269 let tool_names: Vec<String> = completion
1270 .tools
1271 .iter()
1272 .map(|tool| tool.name.clone())
1273 .collect();
1274 assert_eq!(tool_names, vec![InfiniteTool::name()]);
1275}
1276
1277#[gpui::test]
1278async fn test_mcp_tools(cx: &mut TestAppContext) {
1279 let ThreadTest {
1280 model,
1281 thread,
1282 context_server_store,
1283 fs,
1284 ..
1285 } = setup(cx, TestModel::Fake).await;
1286 let fake_model = model.as_fake();
1287
1288 // Override profiles and wait for settings to be loaded.
1289 fs.insert_file(
1290 paths::settings_file(),
1291 json!({
1292 "agent": {
1293 "always_allow_tool_actions": true,
1294 "profiles": {
1295 "test": {
1296 "name": "Test Profile",
1297 "enable_all_context_servers": true,
1298 "tools": {
1299 EchoTool::name(): true,
1300 }
1301 },
1302 }
1303 }
1304 })
1305 .to_string()
1306 .into_bytes(),
1307 )
1308 .await;
1309 cx.run_until_parked();
1310 thread.update(cx, |thread, cx| {
1311 thread.set_profile(AgentProfileId("test".into()), cx)
1312 });
1313
1314 let mut mcp_tool_calls = setup_context_server(
1315 "test_server",
1316 vec![context_server::types::Tool {
1317 name: "echo".into(),
1318 description: None,
1319 input_schema: serde_json::to_value(EchoTool::input_schema(
1320 LanguageModelToolSchemaFormat::JsonSchema,
1321 ))
1322 .unwrap(),
1323 output_schema: None,
1324 annotations: None,
1325 }],
1326 &context_server_store,
1327 cx,
1328 );
1329
1330 let events = thread.update(cx, |thread, cx| {
1331 thread.send(UserMessageId::new(), ["Hey"], cx).unwrap()
1332 });
1333 cx.run_until_parked();
1334
1335 // Simulate the model calling the MCP tool.
1336 let completion = fake_model.pending_completions().pop().unwrap();
1337 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1338 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1339 LanguageModelToolUse {
1340 id: "tool_1".into(),
1341 name: "echo".into(),
1342 raw_input: json!({"text": "test"}).to_string(),
1343 input: json!({"text": "test"}),
1344 is_input_complete: true,
1345 thought_signature: None,
1346 },
1347 ));
1348 fake_model.end_last_completion_stream();
1349 cx.run_until_parked();
1350
1351 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1352 assert_eq!(tool_call_params.name, "echo");
1353 assert_eq!(tool_call_params.arguments, Some(json!({"text": "test"})));
1354 tool_call_response
1355 .send(context_server::types::CallToolResponse {
1356 content: vec![context_server::types::ToolResponseContent::Text {
1357 text: "test".into(),
1358 }],
1359 is_error: None,
1360 meta: None,
1361 structured_content: None,
1362 })
1363 .unwrap();
1364 cx.run_until_parked();
1365
1366 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1367 fake_model.send_last_completion_stream_text_chunk("Done!");
1368 fake_model.end_last_completion_stream();
1369 events.collect::<Vec<_>>().await;
1370
1371 // Send again after adding the echo tool, ensuring the name collision is resolved.
1372 let events = thread.update(cx, |thread, cx| {
1373 thread.add_tool(EchoTool);
1374 thread.send(UserMessageId::new(), ["Go"], cx).unwrap()
1375 });
1376 cx.run_until_parked();
1377 let completion = fake_model.pending_completions().pop().unwrap();
1378 assert_eq!(
1379 tool_names_for_completion(&completion),
1380 vec!["echo", "test_server_echo"]
1381 );
1382 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1383 LanguageModelToolUse {
1384 id: "tool_2".into(),
1385 name: "test_server_echo".into(),
1386 raw_input: json!({"text": "mcp"}).to_string(),
1387 input: json!({"text": "mcp"}),
1388 is_input_complete: true,
1389 thought_signature: None,
1390 },
1391 ));
1392 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1393 LanguageModelToolUse {
1394 id: "tool_3".into(),
1395 name: "echo".into(),
1396 raw_input: json!({"text": "native"}).to_string(),
1397 input: json!({"text": "native"}),
1398 is_input_complete: true,
1399 thought_signature: None,
1400 },
1401 ));
1402 fake_model.end_last_completion_stream();
1403 cx.run_until_parked();
1404
1405 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1406 assert_eq!(tool_call_params.name, "echo");
1407 assert_eq!(tool_call_params.arguments, Some(json!({"text": "mcp"})));
1408 tool_call_response
1409 .send(context_server::types::CallToolResponse {
1410 content: vec![context_server::types::ToolResponseContent::Text { text: "mcp".into() }],
1411 is_error: None,
1412 meta: None,
1413 structured_content: None,
1414 })
1415 .unwrap();
1416 cx.run_until_parked();
1417
1418 // Ensure the tool results were inserted with the correct names.
1419 let completion = fake_model.pending_completions().pop().unwrap();
1420 assert_eq!(
1421 completion.messages.last().unwrap().content,
1422 vec![
1423 MessageContent::ToolResult(LanguageModelToolResult {
1424 tool_use_id: "tool_3".into(),
1425 tool_name: "echo".into(),
1426 is_error: false,
1427 content: "native".into(),
1428 output: Some("native".into()),
1429 },),
1430 MessageContent::ToolResult(LanguageModelToolResult {
1431 tool_use_id: "tool_2".into(),
1432 tool_name: "test_server_echo".into(),
1433 is_error: false,
1434 content: "mcp".into(),
1435 output: Some("mcp".into()),
1436 },),
1437 ]
1438 );
1439 fake_model.end_last_completion_stream();
1440 events.collect::<Vec<_>>().await;
1441}
1442
1443#[gpui::test]
1444async fn test_mcp_tool_truncation(cx: &mut TestAppContext) {
1445 let ThreadTest {
1446 model,
1447 thread,
1448 context_server_store,
1449 fs,
1450 ..
1451 } = setup(cx, TestModel::Fake).await;
1452 let fake_model = model.as_fake();
1453
1454 // Set up a profile with all tools enabled
1455 fs.insert_file(
1456 paths::settings_file(),
1457 json!({
1458 "agent": {
1459 "profiles": {
1460 "test": {
1461 "name": "Test Profile",
1462 "enable_all_context_servers": true,
1463 "tools": {
1464 EchoTool::name(): true,
1465 DelayTool::name(): true,
1466 WordListTool::name(): true,
1467 ToolRequiringPermission::name(): true,
1468 InfiniteTool::name(): true,
1469 }
1470 },
1471 }
1472 }
1473 })
1474 .to_string()
1475 .into_bytes(),
1476 )
1477 .await;
1478 cx.run_until_parked();
1479
1480 thread.update(cx, |thread, cx| {
1481 thread.set_profile(AgentProfileId("test".into()), cx);
1482 thread.add_tool(EchoTool);
1483 thread.add_tool(DelayTool);
1484 thread.add_tool(WordListTool);
1485 thread.add_tool(ToolRequiringPermission);
1486 thread.add_tool(InfiniteTool);
1487 });
1488
1489 // Set up multiple context servers with some overlapping tool names
1490 let _server1_calls = setup_context_server(
1491 "xxx",
1492 vec![
1493 context_server::types::Tool {
1494 name: "echo".into(), // Conflicts with native EchoTool
1495 description: None,
1496 input_schema: serde_json::to_value(EchoTool::input_schema(
1497 LanguageModelToolSchemaFormat::JsonSchema,
1498 ))
1499 .unwrap(),
1500 output_schema: None,
1501 annotations: None,
1502 },
1503 context_server::types::Tool {
1504 name: "unique_tool_1".into(),
1505 description: None,
1506 input_schema: json!({"type": "object", "properties": {}}),
1507 output_schema: None,
1508 annotations: None,
1509 },
1510 ],
1511 &context_server_store,
1512 cx,
1513 );
1514
1515 let _server2_calls = setup_context_server(
1516 "yyy",
1517 vec![
1518 context_server::types::Tool {
1519 name: "echo".into(), // Also conflicts with native EchoTool
1520 description: None,
1521 input_schema: serde_json::to_value(EchoTool::input_schema(
1522 LanguageModelToolSchemaFormat::JsonSchema,
1523 ))
1524 .unwrap(),
1525 output_schema: None,
1526 annotations: None,
1527 },
1528 context_server::types::Tool {
1529 name: "unique_tool_2".into(),
1530 description: None,
1531 input_schema: json!({"type": "object", "properties": {}}),
1532 output_schema: None,
1533 annotations: None,
1534 },
1535 context_server::types::Tool {
1536 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1537 description: None,
1538 input_schema: json!({"type": "object", "properties": {}}),
1539 output_schema: None,
1540 annotations: None,
1541 },
1542 context_server::types::Tool {
1543 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1544 description: None,
1545 input_schema: json!({"type": "object", "properties": {}}),
1546 output_schema: None,
1547 annotations: None,
1548 },
1549 ],
1550 &context_server_store,
1551 cx,
1552 );
1553 let _server3_calls = setup_context_server(
1554 "zzz",
1555 vec![
1556 context_server::types::Tool {
1557 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1558 description: None,
1559 input_schema: json!({"type": "object", "properties": {}}),
1560 output_schema: None,
1561 annotations: None,
1562 },
1563 context_server::types::Tool {
1564 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1565 description: None,
1566 input_schema: json!({"type": "object", "properties": {}}),
1567 output_schema: None,
1568 annotations: None,
1569 },
1570 context_server::types::Tool {
1571 name: "c".repeat(MAX_TOOL_NAME_LENGTH + 1),
1572 description: None,
1573 input_schema: json!({"type": "object", "properties": {}}),
1574 output_schema: None,
1575 annotations: None,
1576 },
1577 ],
1578 &context_server_store,
1579 cx,
1580 );
1581
1582 thread
1583 .update(cx, |thread, cx| {
1584 thread.send(UserMessageId::new(), ["Go"], cx)
1585 })
1586 .unwrap();
1587 cx.run_until_parked();
1588 let completion = fake_model.pending_completions().pop().unwrap();
1589 assert_eq!(
1590 tool_names_for_completion(&completion),
1591 vec![
1592 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1593 "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
1594 "delay",
1595 "echo",
1596 "infinite",
1597 "tool_requiring_permission",
1598 "unique_tool_1",
1599 "unique_tool_2",
1600 "word_list",
1601 "xxx_echo",
1602 "y_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1603 "yyy_echo",
1604 "z_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1605 ]
1606 );
1607}
1608
1609#[gpui::test]
1610#[cfg_attr(not(feature = "e2e"), ignore)]
1611async fn test_cancellation(cx: &mut TestAppContext) {
1612 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1613
1614 let mut events = thread
1615 .update(cx, |thread, cx| {
1616 thread.add_tool(InfiniteTool);
1617 thread.add_tool(EchoTool);
1618 thread.send(
1619 UserMessageId::new(),
1620 ["Call the echo tool, then call the infinite tool, then explain their output"],
1621 cx,
1622 )
1623 })
1624 .unwrap();
1625
1626 // Wait until both tools are called.
1627 let mut expected_tools = vec!["Echo", "Infinite Tool"];
1628 let mut echo_id = None;
1629 let mut echo_completed = false;
1630 while let Some(event) = events.next().await {
1631 match event.unwrap() {
1632 ThreadEvent::ToolCall(tool_call) => {
1633 assert_eq!(tool_call.title, expected_tools.remove(0));
1634 if tool_call.title == "Echo" {
1635 echo_id = Some(tool_call.tool_call_id);
1636 }
1637 }
1638 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1639 acp::ToolCallUpdate {
1640 tool_call_id,
1641 fields:
1642 acp::ToolCallUpdateFields {
1643 status: Some(acp::ToolCallStatus::Completed),
1644 ..
1645 },
1646 ..
1647 },
1648 )) if Some(&tool_call_id) == echo_id.as_ref() => {
1649 echo_completed = true;
1650 }
1651 _ => {}
1652 }
1653
1654 if expected_tools.is_empty() && echo_completed {
1655 break;
1656 }
1657 }
1658
1659 // Cancel the current send and ensure that the event stream is closed, even
1660 // if one of the tools is still running.
1661 thread.update(cx, |thread, cx| thread.cancel(cx)).await;
1662 let events = events.collect::<Vec<_>>().await;
1663 let last_event = events.last();
1664 assert!(
1665 matches!(
1666 last_event,
1667 Some(Ok(ThreadEvent::Stop(acp::StopReason::Cancelled)))
1668 ),
1669 "unexpected event {last_event:?}"
1670 );
1671
1672 // Ensure we can still send a new message after cancellation.
1673 let events = thread
1674 .update(cx, |thread, cx| {
1675 thread.send(
1676 UserMessageId::new(),
1677 ["Testing: reply with 'Hello' then stop."],
1678 cx,
1679 )
1680 })
1681 .unwrap()
1682 .collect::<Vec<_>>()
1683 .await;
1684 thread.update(cx, |thread, _cx| {
1685 let message = thread.last_message().unwrap();
1686 let agent_message = message.as_agent_message().unwrap();
1687 assert_eq!(
1688 agent_message.content,
1689 vec![AgentMessageContent::Text("Hello".to_string())]
1690 );
1691 });
1692 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1693}
1694
1695#[gpui::test]
1696async fn test_terminal_tool_cancellation_captures_output(cx: &mut TestAppContext) {
1697 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1698 always_allow_tools(cx);
1699 let fake_model = model.as_fake();
1700
1701 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1702 let environment = Rc::new(FakeThreadEnvironment {
1703 handle: handle.clone(),
1704 });
1705
1706 let mut events = thread
1707 .update(cx, |thread, cx| {
1708 thread.add_tool(crate::TerminalTool::new(
1709 thread.project().clone(),
1710 environment,
1711 ));
1712 thread.send(UserMessageId::new(), ["run a command"], cx)
1713 })
1714 .unwrap();
1715
1716 cx.run_until_parked();
1717
1718 // Simulate the model calling the terminal tool
1719 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1720 LanguageModelToolUse {
1721 id: "terminal_tool_1".into(),
1722 name: "terminal".into(),
1723 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1724 input: json!({"command": "sleep 1000", "cd": "."}),
1725 is_input_complete: true,
1726 thought_signature: None,
1727 },
1728 ));
1729 fake_model.end_last_completion_stream();
1730
1731 // Wait for the terminal tool to start running
1732 wait_for_terminal_tool_started(&mut events, cx).await;
1733
1734 // Cancel the thread while the terminal is running
1735 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
1736
1737 // Collect remaining events, driving the executor to let cancellation complete
1738 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1739
1740 // Verify the terminal was killed
1741 assert!(
1742 handle.was_killed(),
1743 "expected terminal handle to be killed on cancellation"
1744 );
1745
1746 // Verify we got a cancellation stop event
1747 assert_eq!(
1748 stop_events(remaining_events),
1749 vec![acp::StopReason::Cancelled],
1750 );
1751
1752 // Verify the tool result contains the terminal output, not just "Tool canceled by user"
1753 thread.update(cx, |thread, _cx| {
1754 let message = thread.last_message().unwrap();
1755 let agent_message = message.as_agent_message().unwrap();
1756
1757 let tool_use = agent_message
1758 .content
1759 .iter()
1760 .find_map(|content| match content {
1761 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
1762 _ => None,
1763 })
1764 .expect("expected tool use in agent message");
1765
1766 let tool_result = agent_message
1767 .tool_results
1768 .get(&tool_use.id)
1769 .expect("expected tool result");
1770
1771 let result_text = match &tool_result.content {
1772 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
1773 _ => panic!("expected text content in tool result"),
1774 };
1775
1776 // "partial output" comes from FakeTerminalHandle's output field
1777 assert!(
1778 result_text.contains("partial output"),
1779 "expected tool result to contain terminal output, got: {result_text}"
1780 );
1781 // Match the actual format from process_content in terminal_tool.rs
1782 assert!(
1783 result_text.contains("The user stopped this command"),
1784 "expected tool result to indicate user stopped, got: {result_text}"
1785 );
1786 });
1787
1788 // Verify we can send a new message after cancellation
1789 verify_thread_recovery(&thread, &fake_model, cx).await;
1790}
1791
1792/// Helper to verify thread can recover after cancellation by sending a simple message.
1793async fn verify_thread_recovery(
1794 thread: &Entity<Thread>,
1795 fake_model: &FakeLanguageModel,
1796 cx: &mut TestAppContext,
1797) {
1798 let events = thread
1799 .update(cx, |thread, cx| {
1800 thread.send(
1801 UserMessageId::new(),
1802 ["Testing: reply with 'Hello' then stop."],
1803 cx,
1804 )
1805 })
1806 .unwrap();
1807 cx.run_until_parked();
1808 fake_model.send_last_completion_stream_text_chunk("Hello");
1809 fake_model
1810 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
1811 fake_model.end_last_completion_stream();
1812
1813 let events = events.collect::<Vec<_>>().await;
1814 thread.update(cx, |thread, _cx| {
1815 let message = thread.last_message().unwrap();
1816 let agent_message = message.as_agent_message().unwrap();
1817 assert_eq!(
1818 agent_message.content,
1819 vec![AgentMessageContent::Text("Hello".to_string())]
1820 );
1821 });
1822 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1823}
1824
1825/// Waits for a terminal tool to start by watching for a ToolCallUpdate with terminal content.
1826async fn wait_for_terminal_tool_started(
1827 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1828 cx: &mut TestAppContext,
1829) {
1830 let deadline = cx.executor().num_cpus() * 100; // Scale with available parallelism
1831 for _ in 0..deadline {
1832 cx.run_until_parked();
1833
1834 while let Some(Some(event)) = events.next().now_or_never() {
1835 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1836 update,
1837 ))) = &event
1838 {
1839 if update.fields.content.as_ref().is_some_and(|content| {
1840 content
1841 .iter()
1842 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1843 }) {
1844 return;
1845 }
1846 }
1847 }
1848
1849 cx.background_executor
1850 .timer(Duration::from_millis(10))
1851 .await;
1852 }
1853 panic!("terminal tool did not start within the expected time");
1854}
1855
1856/// Collects events until a Stop event is received, driving the executor to completion.
1857async fn collect_events_until_stop(
1858 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1859 cx: &mut TestAppContext,
1860) -> Vec<Result<ThreadEvent>> {
1861 let mut collected = Vec::new();
1862 let deadline = cx.executor().num_cpus() * 200;
1863
1864 for _ in 0..deadline {
1865 cx.executor().advance_clock(Duration::from_millis(10));
1866 cx.run_until_parked();
1867
1868 while let Some(Some(event)) = events.next().now_or_never() {
1869 let is_stop = matches!(&event, Ok(ThreadEvent::Stop(_)));
1870 collected.push(event);
1871 if is_stop {
1872 return collected;
1873 }
1874 }
1875 }
1876 panic!(
1877 "did not receive Stop event within the expected time; collected {} events",
1878 collected.len()
1879 );
1880}
1881
1882#[gpui::test]
1883async fn test_truncate_while_terminal_tool_running(cx: &mut TestAppContext) {
1884 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1885 always_allow_tools(cx);
1886 let fake_model = model.as_fake();
1887
1888 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1889 let environment = Rc::new(FakeThreadEnvironment {
1890 handle: handle.clone(),
1891 });
1892
1893 let message_id = UserMessageId::new();
1894 let mut events = thread
1895 .update(cx, |thread, cx| {
1896 thread.add_tool(crate::TerminalTool::new(
1897 thread.project().clone(),
1898 environment,
1899 ));
1900 thread.send(message_id.clone(), ["run a command"], cx)
1901 })
1902 .unwrap();
1903
1904 cx.run_until_parked();
1905
1906 // Simulate the model calling the terminal tool
1907 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1908 LanguageModelToolUse {
1909 id: "terminal_tool_1".into(),
1910 name: "terminal".into(),
1911 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1912 input: json!({"command": "sleep 1000", "cd": "."}),
1913 is_input_complete: true,
1914 thought_signature: None,
1915 },
1916 ));
1917 fake_model.end_last_completion_stream();
1918
1919 // Wait for the terminal tool to start running
1920 wait_for_terminal_tool_started(&mut events, cx).await;
1921
1922 // Truncate the thread while the terminal is running
1923 thread
1924 .update(cx, |thread, cx| thread.truncate(message_id, cx))
1925 .unwrap();
1926
1927 // Drive the executor to let cancellation complete
1928 let _ = collect_events_until_stop(&mut events, cx).await;
1929
1930 // Verify the terminal was killed
1931 assert!(
1932 handle.was_killed(),
1933 "expected terminal handle to be killed on truncate"
1934 );
1935
1936 // Verify the thread is empty after truncation
1937 thread.update(cx, |thread, _cx| {
1938 assert_eq!(
1939 thread.to_markdown(),
1940 "",
1941 "expected thread to be empty after truncating the only message"
1942 );
1943 });
1944
1945 // Verify we can send a new message after truncation
1946 verify_thread_recovery(&thread, &fake_model, cx).await;
1947}
1948
1949#[gpui::test]
1950async fn test_cancel_multiple_concurrent_terminal_tools(cx: &mut TestAppContext) {
1951 // Tests that cancellation properly kills all running terminal tools when multiple are active.
1952 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1953 always_allow_tools(cx);
1954 let fake_model = model.as_fake();
1955
1956 let environment = Rc::new(MultiTerminalEnvironment::new());
1957
1958 let mut events = thread
1959 .update(cx, |thread, cx| {
1960 thread.add_tool(crate::TerminalTool::new(
1961 thread.project().clone(),
1962 environment.clone(),
1963 ));
1964 thread.send(UserMessageId::new(), ["run multiple commands"], cx)
1965 })
1966 .unwrap();
1967
1968 cx.run_until_parked();
1969
1970 // Simulate the model calling two terminal tools
1971 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1972 LanguageModelToolUse {
1973 id: "terminal_tool_1".into(),
1974 name: "terminal".into(),
1975 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1976 input: json!({"command": "sleep 1000", "cd": "."}),
1977 is_input_complete: true,
1978 thought_signature: None,
1979 },
1980 ));
1981 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1982 LanguageModelToolUse {
1983 id: "terminal_tool_2".into(),
1984 name: "terminal".into(),
1985 raw_input: r#"{"command": "sleep 2000", "cd": "."}"#.into(),
1986 input: json!({"command": "sleep 2000", "cd": "."}),
1987 is_input_complete: true,
1988 thought_signature: None,
1989 },
1990 ));
1991 fake_model.end_last_completion_stream();
1992
1993 // Wait for both terminal tools to start by counting terminal content updates
1994 let mut terminals_started = 0;
1995 let deadline = cx.executor().num_cpus() * 100;
1996 for _ in 0..deadline {
1997 cx.run_until_parked();
1998
1999 while let Some(Some(event)) = events.next().now_or_never() {
2000 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
2001 update,
2002 ))) = &event
2003 {
2004 if update.fields.content.as_ref().is_some_and(|content| {
2005 content
2006 .iter()
2007 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
2008 }) {
2009 terminals_started += 1;
2010 if terminals_started >= 2 {
2011 break;
2012 }
2013 }
2014 }
2015 }
2016 if terminals_started >= 2 {
2017 break;
2018 }
2019
2020 cx.background_executor
2021 .timer(Duration::from_millis(10))
2022 .await;
2023 }
2024 assert!(
2025 terminals_started >= 2,
2026 "expected 2 terminal tools to start, got {terminals_started}"
2027 );
2028
2029 // Cancel the thread while both terminals are running
2030 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
2031
2032 // Collect remaining events
2033 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2034
2035 // Verify both terminal handles were killed
2036 let handles = environment.handles();
2037 assert_eq!(
2038 handles.len(),
2039 2,
2040 "expected 2 terminal handles to be created"
2041 );
2042 assert!(
2043 handles[0].was_killed(),
2044 "expected first terminal handle to be killed on cancellation"
2045 );
2046 assert!(
2047 handles[1].was_killed(),
2048 "expected second terminal handle to be killed on cancellation"
2049 );
2050
2051 // Verify we got a cancellation stop event
2052 assert_eq!(
2053 stop_events(remaining_events),
2054 vec![acp::StopReason::Cancelled],
2055 );
2056}
2057
2058#[gpui::test]
2059async fn test_terminal_tool_stopped_via_terminal_card_button(cx: &mut TestAppContext) {
2060 // Tests that clicking the stop button on the terminal card (as opposed to the main
2061 // cancel button) properly reports user stopped via the was_stopped_by_user path.
2062 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2063 always_allow_tools(cx);
2064 let fake_model = model.as_fake();
2065
2066 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2067 let environment = Rc::new(FakeThreadEnvironment {
2068 handle: handle.clone(),
2069 });
2070
2071 let mut events = thread
2072 .update(cx, |thread, cx| {
2073 thread.add_tool(crate::TerminalTool::new(
2074 thread.project().clone(),
2075 environment,
2076 ));
2077 thread.send(UserMessageId::new(), ["run a command"], cx)
2078 })
2079 .unwrap();
2080
2081 cx.run_until_parked();
2082
2083 // Simulate the model calling the terminal tool
2084 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2085 LanguageModelToolUse {
2086 id: "terminal_tool_1".into(),
2087 name: "terminal".into(),
2088 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2089 input: json!({"command": "sleep 1000", "cd": "."}),
2090 is_input_complete: true,
2091 thought_signature: None,
2092 },
2093 ));
2094 fake_model.end_last_completion_stream();
2095
2096 // Wait for the terminal tool to start running
2097 wait_for_terminal_tool_started(&mut events, cx).await;
2098
2099 // Simulate user clicking stop on the terminal card itself.
2100 // This sets the flag and signals exit (simulating what the real UI would do).
2101 handle.set_stopped_by_user(true);
2102 handle.killed.store(true, Ordering::SeqCst);
2103 handle.signal_exit();
2104
2105 // Wait for the tool to complete
2106 cx.run_until_parked();
2107
2108 // The thread continues after tool completion - simulate the model ending its turn
2109 fake_model
2110 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2111 fake_model.end_last_completion_stream();
2112
2113 // Collect remaining events
2114 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2115
2116 // Verify we got an EndTurn (not Cancelled, since we didn't cancel the thread)
2117 assert_eq!(
2118 stop_events(remaining_events),
2119 vec![acp::StopReason::EndTurn],
2120 );
2121
2122 // Verify the tool result indicates user stopped
2123 thread.update(cx, |thread, _cx| {
2124 let message = thread.last_message().unwrap();
2125 let agent_message = message.as_agent_message().unwrap();
2126
2127 let tool_use = agent_message
2128 .content
2129 .iter()
2130 .find_map(|content| match content {
2131 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2132 _ => None,
2133 })
2134 .expect("expected tool use in agent message");
2135
2136 let tool_result = agent_message
2137 .tool_results
2138 .get(&tool_use.id)
2139 .expect("expected tool result");
2140
2141 let result_text = match &tool_result.content {
2142 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2143 _ => panic!("expected text content in tool result"),
2144 };
2145
2146 assert!(
2147 result_text.contains("The user stopped this command"),
2148 "expected tool result to indicate user stopped, got: {result_text}"
2149 );
2150 });
2151}
2152
2153#[gpui::test]
2154async fn test_terminal_tool_timeout_expires(cx: &mut TestAppContext) {
2155 // Tests that when a timeout is configured and expires, the tool result indicates timeout.
2156 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2157 always_allow_tools(cx);
2158 let fake_model = model.as_fake();
2159
2160 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2161 let environment = Rc::new(FakeThreadEnvironment {
2162 handle: handle.clone(),
2163 });
2164
2165 let mut events = thread
2166 .update(cx, |thread, cx| {
2167 thread.add_tool(crate::TerminalTool::new(
2168 thread.project().clone(),
2169 environment,
2170 ));
2171 thread.send(UserMessageId::new(), ["run a command with timeout"], cx)
2172 })
2173 .unwrap();
2174
2175 cx.run_until_parked();
2176
2177 // Simulate the model calling the terminal tool with a short timeout
2178 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2179 LanguageModelToolUse {
2180 id: "terminal_tool_1".into(),
2181 name: "terminal".into(),
2182 raw_input: r#"{"command": "sleep 1000", "cd": ".", "timeout_ms": 100}"#.into(),
2183 input: json!({"command": "sleep 1000", "cd": ".", "timeout_ms": 100}),
2184 is_input_complete: true,
2185 thought_signature: None,
2186 },
2187 ));
2188 fake_model.end_last_completion_stream();
2189
2190 // Wait for the terminal tool to start running
2191 wait_for_terminal_tool_started(&mut events, cx).await;
2192
2193 // Advance clock past the timeout
2194 cx.executor().advance_clock(Duration::from_millis(200));
2195 cx.run_until_parked();
2196
2197 // The thread continues after tool completion - simulate the model ending its turn
2198 fake_model
2199 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2200 fake_model.end_last_completion_stream();
2201
2202 // Collect remaining events
2203 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2204
2205 // Verify the terminal was killed due to timeout
2206 assert!(
2207 handle.was_killed(),
2208 "expected terminal handle to be killed on timeout"
2209 );
2210
2211 // Verify we got an EndTurn (the tool completed, just with timeout)
2212 assert_eq!(
2213 stop_events(remaining_events),
2214 vec![acp::StopReason::EndTurn],
2215 );
2216
2217 // Verify the tool result indicates timeout, not user stopped
2218 thread.update(cx, |thread, _cx| {
2219 let message = thread.last_message().unwrap();
2220 let agent_message = message.as_agent_message().unwrap();
2221
2222 let tool_use = agent_message
2223 .content
2224 .iter()
2225 .find_map(|content| match content {
2226 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2227 _ => None,
2228 })
2229 .expect("expected tool use in agent message");
2230
2231 let tool_result = agent_message
2232 .tool_results
2233 .get(&tool_use.id)
2234 .expect("expected tool result");
2235
2236 let result_text = match &tool_result.content {
2237 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2238 _ => panic!("expected text content in tool result"),
2239 };
2240
2241 assert!(
2242 result_text.contains("timed out"),
2243 "expected tool result to indicate timeout, got: {result_text}"
2244 );
2245 assert!(
2246 !result_text.contains("The user stopped"),
2247 "tool result should not mention user stopped when it timed out, got: {result_text}"
2248 );
2249 });
2250}
2251
2252#[gpui::test]
2253async fn test_in_progress_send_canceled_by_next_send(cx: &mut TestAppContext) {
2254 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2255 let fake_model = model.as_fake();
2256
2257 let events_1 = thread
2258 .update(cx, |thread, cx| {
2259 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2260 })
2261 .unwrap();
2262 cx.run_until_parked();
2263 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2264 cx.run_until_parked();
2265
2266 let events_2 = thread
2267 .update(cx, |thread, cx| {
2268 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2269 })
2270 .unwrap();
2271 cx.run_until_parked();
2272 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2273 fake_model
2274 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2275 fake_model.end_last_completion_stream();
2276
2277 let events_1 = events_1.collect::<Vec<_>>().await;
2278 assert_eq!(stop_events(events_1), vec![acp::StopReason::Cancelled]);
2279 let events_2 = events_2.collect::<Vec<_>>().await;
2280 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2281}
2282
2283#[gpui::test]
2284async fn test_subsequent_successful_sends_dont_cancel(cx: &mut TestAppContext) {
2285 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2286 let fake_model = model.as_fake();
2287
2288 let events_1 = thread
2289 .update(cx, |thread, cx| {
2290 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2291 })
2292 .unwrap();
2293 cx.run_until_parked();
2294 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2295 fake_model
2296 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2297 fake_model.end_last_completion_stream();
2298 let events_1 = events_1.collect::<Vec<_>>().await;
2299
2300 let events_2 = thread
2301 .update(cx, |thread, cx| {
2302 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2303 })
2304 .unwrap();
2305 cx.run_until_parked();
2306 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2307 fake_model
2308 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2309 fake_model.end_last_completion_stream();
2310 let events_2 = events_2.collect::<Vec<_>>().await;
2311
2312 assert_eq!(stop_events(events_1), vec![acp::StopReason::EndTurn]);
2313 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2314}
2315
2316#[gpui::test]
2317async fn test_refusal(cx: &mut TestAppContext) {
2318 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2319 let fake_model = model.as_fake();
2320
2321 let events = thread
2322 .update(cx, |thread, cx| {
2323 thread.send(UserMessageId::new(), ["Hello"], cx)
2324 })
2325 .unwrap();
2326 cx.run_until_parked();
2327 thread.read_with(cx, |thread, _| {
2328 assert_eq!(
2329 thread.to_markdown(),
2330 indoc! {"
2331 ## User
2332
2333 Hello
2334 "}
2335 );
2336 });
2337
2338 fake_model.send_last_completion_stream_text_chunk("Hey!");
2339 cx.run_until_parked();
2340 thread.read_with(cx, |thread, _| {
2341 assert_eq!(
2342 thread.to_markdown(),
2343 indoc! {"
2344 ## User
2345
2346 Hello
2347
2348 ## Assistant
2349
2350 Hey!
2351 "}
2352 );
2353 });
2354
2355 // If the model refuses to continue, the thread should remove all the messages after the last user message.
2356 fake_model
2357 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::Refusal));
2358 let events = events.collect::<Vec<_>>().await;
2359 assert_eq!(stop_events(events), vec![acp::StopReason::Refusal]);
2360 thread.read_with(cx, |thread, _| {
2361 assert_eq!(thread.to_markdown(), "");
2362 });
2363}
2364
2365#[gpui::test]
2366async fn test_truncate_first_message(cx: &mut TestAppContext) {
2367 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2368 let fake_model = model.as_fake();
2369
2370 let message_id = UserMessageId::new();
2371 thread
2372 .update(cx, |thread, cx| {
2373 thread.send(message_id.clone(), ["Hello"], cx)
2374 })
2375 .unwrap();
2376 cx.run_until_parked();
2377 thread.read_with(cx, |thread, _| {
2378 assert_eq!(
2379 thread.to_markdown(),
2380 indoc! {"
2381 ## User
2382
2383 Hello
2384 "}
2385 );
2386 assert_eq!(thread.latest_token_usage(), None);
2387 });
2388
2389 fake_model.send_last_completion_stream_text_chunk("Hey!");
2390 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2391 language_model::TokenUsage {
2392 input_tokens: 32_000,
2393 output_tokens: 16_000,
2394 cache_creation_input_tokens: 0,
2395 cache_read_input_tokens: 0,
2396 },
2397 ));
2398 cx.run_until_parked();
2399 thread.read_with(cx, |thread, _| {
2400 assert_eq!(
2401 thread.to_markdown(),
2402 indoc! {"
2403 ## User
2404
2405 Hello
2406
2407 ## Assistant
2408
2409 Hey!
2410 "}
2411 );
2412 assert_eq!(
2413 thread.latest_token_usage(),
2414 Some(acp_thread::TokenUsage {
2415 used_tokens: 32_000 + 16_000,
2416 max_tokens: 1_000_000,
2417 output_tokens: 16_000,
2418 })
2419 );
2420 });
2421
2422 thread
2423 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2424 .unwrap();
2425 cx.run_until_parked();
2426 thread.read_with(cx, |thread, _| {
2427 assert_eq!(thread.to_markdown(), "");
2428 assert_eq!(thread.latest_token_usage(), None);
2429 });
2430
2431 // Ensure we can still send a new message after truncation.
2432 thread
2433 .update(cx, |thread, cx| {
2434 thread.send(UserMessageId::new(), ["Hi"], cx)
2435 })
2436 .unwrap();
2437 thread.update(cx, |thread, _cx| {
2438 assert_eq!(
2439 thread.to_markdown(),
2440 indoc! {"
2441 ## User
2442
2443 Hi
2444 "}
2445 );
2446 });
2447 cx.run_until_parked();
2448 fake_model.send_last_completion_stream_text_chunk("Ahoy!");
2449 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2450 language_model::TokenUsage {
2451 input_tokens: 40_000,
2452 output_tokens: 20_000,
2453 cache_creation_input_tokens: 0,
2454 cache_read_input_tokens: 0,
2455 },
2456 ));
2457 cx.run_until_parked();
2458 thread.read_with(cx, |thread, _| {
2459 assert_eq!(
2460 thread.to_markdown(),
2461 indoc! {"
2462 ## User
2463
2464 Hi
2465
2466 ## Assistant
2467
2468 Ahoy!
2469 "}
2470 );
2471
2472 assert_eq!(
2473 thread.latest_token_usage(),
2474 Some(acp_thread::TokenUsage {
2475 used_tokens: 40_000 + 20_000,
2476 max_tokens: 1_000_000,
2477 output_tokens: 20_000,
2478 })
2479 );
2480 });
2481}
2482
2483#[gpui::test]
2484async fn test_truncate_second_message(cx: &mut TestAppContext) {
2485 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2486 let fake_model = model.as_fake();
2487
2488 thread
2489 .update(cx, |thread, cx| {
2490 thread.send(UserMessageId::new(), ["Message 1"], cx)
2491 })
2492 .unwrap();
2493 cx.run_until_parked();
2494 fake_model.send_last_completion_stream_text_chunk("Message 1 response");
2495 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2496 language_model::TokenUsage {
2497 input_tokens: 32_000,
2498 output_tokens: 16_000,
2499 cache_creation_input_tokens: 0,
2500 cache_read_input_tokens: 0,
2501 },
2502 ));
2503 fake_model.end_last_completion_stream();
2504 cx.run_until_parked();
2505
2506 let assert_first_message_state = |cx: &mut TestAppContext| {
2507 thread.clone().read_with(cx, |thread, _| {
2508 assert_eq!(
2509 thread.to_markdown(),
2510 indoc! {"
2511 ## User
2512
2513 Message 1
2514
2515 ## Assistant
2516
2517 Message 1 response
2518 "}
2519 );
2520
2521 assert_eq!(
2522 thread.latest_token_usage(),
2523 Some(acp_thread::TokenUsage {
2524 used_tokens: 32_000 + 16_000,
2525 max_tokens: 1_000_000,
2526 output_tokens: 16_000,
2527 })
2528 );
2529 });
2530 };
2531
2532 assert_first_message_state(cx);
2533
2534 let second_message_id = UserMessageId::new();
2535 thread
2536 .update(cx, |thread, cx| {
2537 thread.send(second_message_id.clone(), ["Message 2"], cx)
2538 })
2539 .unwrap();
2540 cx.run_until_parked();
2541
2542 fake_model.send_last_completion_stream_text_chunk("Message 2 response");
2543 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2544 language_model::TokenUsage {
2545 input_tokens: 40_000,
2546 output_tokens: 20_000,
2547 cache_creation_input_tokens: 0,
2548 cache_read_input_tokens: 0,
2549 },
2550 ));
2551 fake_model.end_last_completion_stream();
2552 cx.run_until_parked();
2553
2554 thread.read_with(cx, |thread, _| {
2555 assert_eq!(
2556 thread.to_markdown(),
2557 indoc! {"
2558 ## User
2559
2560 Message 1
2561
2562 ## Assistant
2563
2564 Message 1 response
2565
2566 ## User
2567
2568 Message 2
2569
2570 ## Assistant
2571
2572 Message 2 response
2573 "}
2574 );
2575
2576 assert_eq!(
2577 thread.latest_token_usage(),
2578 Some(acp_thread::TokenUsage {
2579 used_tokens: 40_000 + 20_000,
2580 max_tokens: 1_000_000,
2581 output_tokens: 20_000,
2582 })
2583 );
2584 });
2585
2586 thread
2587 .update(cx, |thread, cx| thread.truncate(second_message_id, cx))
2588 .unwrap();
2589 cx.run_until_parked();
2590
2591 assert_first_message_state(cx);
2592}
2593
2594#[gpui::test]
2595async fn test_title_generation(cx: &mut TestAppContext) {
2596 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2597 let fake_model = model.as_fake();
2598
2599 let summary_model = Arc::new(FakeLanguageModel::default());
2600 thread.update(cx, |thread, cx| {
2601 thread.set_summarization_model(Some(summary_model.clone()), cx)
2602 });
2603
2604 let send = thread
2605 .update(cx, |thread, cx| {
2606 thread.send(UserMessageId::new(), ["Hello"], cx)
2607 })
2608 .unwrap();
2609 cx.run_until_parked();
2610
2611 fake_model.send_last_completion_stream_text_chunk("Hey!");
2612 fake_model.end_last_completion_stream();
2613 cx.run_until_parked();
2614 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "New Thread"));
2615
2616 // Ensure the summary model has been invoked to generate a title.
2617 summary_model.send_last_completion_stream_text_chunk("Hello ");
2618 summary_model.send_last_completion_stream_text_chunk("world\nG");
2619 summary_model.send_last_completion_stream_text_chunk("oodnight Moon");
2620 summary_model.end_last_completion_stream();
2621 send.collect::<Vec<_>>().await;
2622 cx.run_until_parked();
2623 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2624
2625 // Send another message, ensuring no title is generated this time.
2626 let send = thread
2627 .update(cx, |thread, cx| {
2628 thread.send(UserMessageId::new(), ["Hello again"], cx)
2629 })
2630 .unwrap();
2631 cx.run_until_parked();
2632 fake_model.send_last_completion_stream_text_chunk("Hey again!");
2633 fake_model.end_last_completion_stream();
2634 cx.run_until_parked();
2635 assert_eq!(summary_model.pending_completions(), Vec::new());
2636 send.collect::<Vec<_>>().await;
2637 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2638}
2639
2640#[gpui::test]
2641async fn test_building_request_with_pending_tools(cx: &mut TestAppContext) {
2642 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2643 let fake_model = model.as_fake();
2644
2645 let _events = thread
2646 .update(cx, |thread, cx| {
2647 thread.add_tool(ToolRequiringPermission);
2648 thread.add_tool(EchoTool);
2649 thread.send(UserMessageId::new(), ["Hey!"], cx)
2650 })
2651 .unwrap();
2652 cx.run_until_parked();
2653
2654 let permission_tool_use = LanguageModelToolUse {
2655 id: "tool_id_1".into(),
2656 name: ToolRequiringPermission::name().into(),
2657 raw_input: "{}".into(),
2658 input: json!({}),
2659 is_input_complete: true,
2660 thought_signature: None,
2661 };
2662 let echo_tool_use = LanguageModelToolUse {
2663 id: "tool_id_2".into(),
2664 name: EchoTool::name().into(),
2665 raw_input: json!({"text": "test"}).to_string(),
2666 input: json!({"text": "test"}),
2667 is_input_complete: true,
2668 thought_signature: None,
2669 };
2670 fake_model.send_last_completion_stream_text_chunk("Hi!");
2671 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2672 permission_tool_use,
2673 ));
2674 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2675 echo_tool_use.clone(),
2676 ));
2677 fake_model.end_last_completion_stream();
2678 cx.run_until_parked();
2679
2680 // Ensure pending tools are skipped when building a request.
2681 let request = thread
2682 .read_with(cx, |thread, cx| {
2683 thread.build_completion_request(CompletionIntent::EditFile, cx)
2684 })
2685 .unwrap();
2686 assert_eq!(
2687 request.messages[1..],
2688 vec![
2689 LanguageModelRequestMessage {
2690 role: Role::User,
2691 content: vec!["Hey!".into()],
2692 cache: true,
2693 reasoning_details: None,
2694 },
2695 LanguageModelRequestMessage {
2696 role: Role::Assistant,
2697 content: vec![
2698 MessageContent::Text("Hi!".into()),
2699 MessageContent::ToolUse(echo_tool_use.clone())
2700 ],
2701 cache: false,
2702 reasoning_details: None,
2703 },
2704 LanguageModelRequestMessage {
2705 role: Role::User,
2706 content: vec![MessageContent::ToolResult(LanguageModelToolResult {
2707 tool_use_id: echo_tool_use.id.clone(),
2708 tool_name: echo_tool_use.name,
2709 is_error: false,
2710 content: "test".into(),
2711 output: Some("test".into())
2712 })],
2713 cache: false,
2714 reasoning_details: None,
2715 },
2716 ],
2717 );
2718}
2719
2720#[gpui::test]
2721async fn test_agent_connection(cx: &mut TestAppContext) {
2722 cx.update(settings::init);
2723 let templates = Templates::new();
2724
2725 // Initialize language model system with test provider
2726 cx.update(|cx| {
2727 gpui_tokio::init(cx);
2728
2729 let http_client = FakeHttpClient::with_404_response();
2730 let clock = Arc::new(clock::FakeSystemClock::new());
2731 let client = Client::new(clock, http_client, cx);
2732 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
2733 language_model::init(client.clone(), cx);
2734 language_models::init(user_store, client.clone(), cx);
2735 LanguageModelRegistry::test(cx);
2736 });
2737 cx.executor().forbid_parking();
2738
2739 // Create a project for new_thread
2740 let fake_fs = cx.update(|cx| fs::FakeFs::new(cx.background_executor().clone()));
2741 fake_fs.insert_tree(path!("/test"), json!({})).await;
2742 let project = Project::test(fake_fs.clone(), [Path::new("/test")], cx).await;
2743 let cwd = Path::new("/test");
2744 let text_thread_store =
2745 cx.new(|cx| assistant_text_thread::TextThreadStore::fake(project.clone(), cx));
2746 let history_store = cx.new(|cx| HistoryStore::new(text_thread_store, cx));
2747
2748 // Create agent and connection
2749 let agent = NativeAgent::new(
2750 project.clone(),
2751 history_store,
2752 templates.clone(),
2753 None,
2754 fake_fs.clone(),
2755 &mut cx.to_async(),
2756 )
2757 .await
2758 .unwrap();
2759 let connection = NativeAgentConnection(agent.clone());
2760
2761 // Create a thread using new_thread
2762 let connection_rc = Rc::new(connection.clone());
2763 let acp_thread = cx
2764 .update(|cx| connection_rc.new_thread(project, cwd, cx))
2765 .await
2766 .expect("new_thread should succeed");
2767
2768 // Get the session_id from the AcpThread
2769 let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
2770
2771 // Test model_selector returns Some
2772 let selector_opt = connection.model_selector(&session_id);
2773 assert!(
2774 selector_opt.is_some(),
2775 "agent should always support ModelSelector"
2776 );
2777 let selector = selector_opt.unwrap();
2778
2779 // Test list_models
2780 let listed_models = cx
2781 .update(|cx| selector.list_models(cx))
2782 .await
2783 .expect("list_models should succeed");
2784 let AgentModelList::Grouped(listed_models) = listed_models else {
2785 panic!("Unexpected model list type");
2786 };
2787 assert!(!listed_models.is_empty(), "should have at least one model");
2788 assert_eq!(
2789 listed_models[&AgentModelGroupName("Fake".into())][0]
2790 .id
2791 .0
2792 .as_ref(),
2793 "fake/fake"
2794 );
2795
2796 // Test selected_model returns the default
2797 let model = cx
2798 .update(|cx| selector.selected_model(cx))
2799 .await
2800 .expect("selected_model should succeed");
2801 let model = cx
2802 .update(|cx| agent.read(cx).models().model_from_id(&model.id))
2803 .unwrap();
2804 let model = model.as_fake();
2805 assert_eq!(model.id().0, "fake", "should return default model");
2806
2807 let request = acp_thread.update(cx, |thread, cx| thread.send(vec!["abc".into()], cx));
2808 cx.run_until_parked();
2809 model.send_last_completion_stream_text_chunk("def");
2810 cx.run_until_parked();
2811 acp_thread.read_with(cx, |thread, cx| {
2812 assert_eq!(
2813 thread.to_markdown(cx),
2814 indoc! {"
2815 ## User
2816
2817 abc
2818
2819 ## Assistant
2820
2821 def
2822
2823 "}
2824 )
2825 });
2826
2827 // Test cancel
2828 cx.update(|cx| connection.cancel(&session_id, cx));
2829 request.await.expect("prompt should fail gracefully");
2830
2831 // Ensure that dropping the ACP thread causes the native thread to be
2832 // dropped as well.
2833 cx.update(|_| drop(acp_thread));
2834 let result = cx
2835 .update(|cx| {
2836 connection.prompt(
2837 Some(acp_thread::UserMessageId::new()),
2838 acp::PromptRequest::new(session_id.clone(), vec!["ghi".into()]),
2839 cx,
2840 )
2841 })
2842 .await;
2843 assert_eq!(
2844 result.as_ref().unwrap_err().to_string(),
2845 "Session not found",
2846 "unexpected result: {:?}",
2847 result
2848 );
2849}
2850
2851#[gpui::test]
2852async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
2853 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2854 thread.update(cx, |thread, _cx| thread.add_tool(ThinkingTool));
2855 let fake_model = model.as_fake();
2856
2857 let mut events = thread
2858 .update(cx, |thread, cx| {
2859 thread.send(UserMessageId::new(), ["Think"], cx)
2860 })
2861 .unwrap();
2862 cx.run_until_parked();
2863
2864 // Simulate streaming partial input.
2865 let input = json!({});
2866 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2867 LanguageModelToolUse {
2868 id: "1".into(),
2869 name: ThinkingTool::name().into(),
2870 raw_input: input.to_string(),
2871 input,
2872 is_input_complete: false,
2873 thought_signature: None,
2874 },
2875 ));
2876
2877 // Input streaming completed
2878 let input = json!({ "content": "Thinking hard!" });
2879 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2880 LanguageModelToolUse {
2881 id: "1".into(),
2882 name: "thinking".into(),
2883 raw_input: input.to_string(),
2884 input,
2885 is_input_complete: true,
2886 thought_signature: None,
2887 },
2888 ));
2889 fake_model.end_last_completion_stream();
2890 cx.run_until_parked();
2891
2892 let tool_call = expect_tool_call(&mut events).await;
2893 assert_eq!(
2894 tool_call,
2895 acp::ToolCall::new("1", "Thinking")
2896 .kind(acp::ToolKind::Think)
2897 .raw_input(json!({}))
2898 .meta(acp::Meta::from_iter([(
2899 "tool_name".into(),
2900 "thinking".into()
2901 )]))
2902 );
2903 let update = expect_tool_call_update_fields(&mut events).await;
2904 assert_eq!(
2905 update,
2906 acp::ToolCallUpdate::new(
2907 "1",
2908 acp::ToolCallUpdateFields::new()
2909 .title("Thinking")
2910 .kind(acp::ToolKind::Think)
2911 .raw_input(json!({ "content": "Thinking hard!"}))
2912 )
2913 );
2914 let update = expect_tool_call_update_fields(&mut events).await;
2915 assert_eq!(
2916 update,
2917 acp::ToolCallUpdate::new(
2918 "1",
2919 acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
2920 )
2921 );
2922 let update = expect_tool_call_update_fields(&mut events).await;
2923 assert_eq!(
2924 update,
2925 acp::ToolCallUpdate::new(
2926 "1",
2927 acp::ToolCallUpdateFields::new().content(vec!["Thinking hard!".into()])
2928 )
2929 );
2930 let update = expect_tool_call_update_fields(&mut events).await;
2931 assert_eq!(
2932 update,
2933 acp::ToolCallUpdate::new(
2934 "1",
2935 acp::ToolCallUpdateFields::new()
2936 .status(acp::ToolCallStatus::Completed)
2937 .raw_output("Finished thinking.")
2938 )
2939 );
2940}
2941
2942#[gpui::test]
2943async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
2944 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2945 let fake_model = model.as_fake();
2946
2947 let mut events = thread
2948 .update(cx, |thread, cx| {
2949 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2950 thread.send(UserMessageId::new(), ["Hello!"], cx)
2951 })
2952 .unwrap();
2953 cx.run_until_parked();
2954
2955 fake_model.send_last_completion_stream_text_chunk("Hey!");
2956 fake_model.end_last_completion_stream();
2957
2958 let mut retry_events = Vec::new();
2959 while let Some(Ok(event)) = events.next().await {
2960 match event {
2961 ThreadEvent::Retry(retry_status) => {
2962 retry_events.push(retry_status);
2963 }
2964 ThreadEvent::Stop(..) => break,
2965 _ => {}
2966 }
2967 }
2968
2969 assert_eq!(retry_events.len(), 0);
2970 thread.read_with(cx, |thread, _cx| {
2971 assert_eq!(
2972 thread.to_markdown(),
2973 indoc! {"
2974 ## User
2975
2976 Hello!
2977
2978 ## Assistant
2979
2980 Hey!
2981 "}
2982 )
2983 });
2984}
2985
2986#[gpui::test]
2987async fn test_send_retry_on_error(cx: &mut TestAppContext) {
2988 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2989 let fake_model = model.as_fake();
2990
2991 let mut events = thread
2992 .update(cx, |thread, cx| {
2993 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2994 thread.send(UserMessageId::new(), ["Hello!"], cx)
2995 })
2996 .unwrap();
2997 cx.run_until_parked();
2998
2999 fake_model.send_last_completion_stream_text_chunk("Hey,");
3000 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3001 provider: LanguageModelProviderName::new("Anthropic"),
3002 retry_after: Some(Duration::from_secs(3)),
3003 });
3004 fake_model.end_last_completion_stream();
3005
3006 cx.executor().advance_clock(Duration::from_secs(3));
3007 cx.run_until_parked();
3008
3009 fake_model.send_last_completion_stream_text_chunk("there!");
3010 fake_model.end_last_completion_stream();
3011 cx.run_until_parked();
3012
3013 let mut retry_events = Vec::new();
3014 while let Some(Ok(event)) = events.next().await {
3015 match event {
3016 ThreadEvent::Retry(retry_status) => {
3017 retry_events.push(retry_status);
3018 }
3019 ThreadEvent::Stop(..) => break,
3020 _ => {}
3021 }
3022 }
3023
3024 assert_eq!(retry_events.len(), 1);
3025 assert!(matches!(
3026 retry_events[0],
3027 acp_thread::RetryStatus { attempt: 1, .. }
3028 ));
3029 thread.read_with(cx, |thread, _cx| {
3030 assert_eq!(
3031 thread.to_markdown(),
3032 indoc! {"
3033 ## User
3034
3035 Hello!
3036
3037 ## Assistant
3038
3039 Hey,
3040
3041 [resume]
3042
3043 ## Assistant
3044
3045 there!
3046 "}
3047 )
3048 });
3049}
3050
3051#[gpui::test]
3052async fn test_send_retry_finishes_tool_calls_on_error(cx: &mut TestAppContext) {
3053 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3054 let fake_model = model.as_fake();
3055
3056 let events = thread
3057 .update(cx, |thread, cx| {
3058 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3059 thread.add_tool(EchoTool);
3060 thread.send(UserMessageId::new(), ["Call the echo tool!"], cx)
3061 })
3062 .unwrap();
3063 cx.run_until_parked();
3064
3065 let tool_use_1 = LanguageModelToolUse {
3066 id: "tool_1".into(),
3067 name: EchoTool::name().into(),
3068 raw_input: json!({"text": "test"}).to_string(),
3069 input: json!({"text": "test"}),
3070 is_input_complete: true,
3071 thought_signature: None,
3072 };
3073 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
3074 tool_use_1.clone(),
3075 ));
3076 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3077 provider: LanguageModelProviderName::new("Anthropic"),
3078 retry_after: Some(Duration::from_secs(3)),
3079 });
3080 fake_model.end_last_completion_stream();
3081
3082 cx.executor().advance_clock(Duration::from_secs(3));
3083 let completion = fake_model.pending_completions().pop().unwrap();
3084 assert_eq!(
3085 completion.messages[1..],
3086 vec![
3087 LanguageModelRequestMessage {
3088 role: Role::User,
3089 content: vec!["Call the echo tool!".into()],
3090 cache: false,
3091 reasoning_details: None,
3092 },
3093 LanguageModelRequestMessage {
3094 role: Role::Assistant,
3095 content: vec![language_model::MessageContent::ToolUse(tool_use_1.clone())],
3096 cache: false,
3097 reasoning_details: None,
3098 },
3099 LanguageModelRequestMessage {
3100 role: Role::User,
3101 content: vec![language_model::MessageContent::ToolResult(
3102 LanguageModelToolResult {
3103 tool_use_id: tool_use_1.id.clone(),
3104 tool_name: tool_use_1.name.clone(),
3105 is_error: false,
3106 content: "test".into(),
3107 output: Some("test".into())
3108 }
3109 )],
3110 cache: true,
3111 reasoning_details: None,
3112 },
3113 ]
3114 );
3115
3116 fake_model.send_last_completion_stream_text_chunk("Done");
3117 fake_model.end_last_completion_stream();
3118 cx.run_until_parked();
3119 events.collect::<Vec<_>>().await;
3120 thread.read_with(cx, |thread, _cx| {
3121 assert_eq!(
3122 thread.last_message(),
3123 Some(Message::Agent(AgentMessage {
3124 content: vec![AgentMessageContent::Text("Done".into())],
3125 tool_results: IndexMap::default(),
3126 reasoning_details: None,
3127 }))
3128 );
3129 })
3130}
3131
3132#[gpui::test]
3133async fn test_send_max_retries_exceeded(cx: &mut TestAppContext) {
3134 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3135 let fake_model = model.as_fake();
3136
3137 let mut events = thread
3138 .update(cx, |thread, cx| {
3139 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3140 thread.send(UserMessageId::new(), ["Hello!"], cx)
3141 })
3142 .unwrap();
3143 cx.run_until_parked();
3144
3145 for _ in 0..crate::thread::MAX_RETRY_ATTEMPTS + 1 {
3146 fake_model.send_last_completion_stream_error(
3147 LanguageModelCompletionError::ServerOverloaded {
3148 provider: LanguageModelProviderName::new("Anthropic"),
3149 retry_after: Some(Duration::from_secs(3)),
3150 },
3151 );
3152 fake_model.end_last_completion_stream();
3153 cx.executor().advance_clock(Duration::from_secs(3));
3154 cx.run_until_parked();
3155 }
3156
3157 let mut errors = Vec::new();
3158 let mut retry_events = Vec::new();
3159 while let Some(event) = events.next().await {
3160 match event {
3161 Ok(ThreadEvent::Retry(retry_status)) => {
3162 retry_events.push(retry_status);
3163 }
3164 Ok(ThreadEvent::Stop(..)) => break,
3165 Err(error) => errors.push(error),
3166 _ => {}
3167 }
3168 }
3169
3170 assert_eq!(
3171 retry_events.len(),
3172 crate::thread::MAX_RETRY_ATTEMPTS as usize
3173 );
3174 for i in 0..crate::thread::MAX_RETRY_ATTEMPTS as usize {
3175 assert_eq!(retry_events[i].attempt, i + 1);
3176 }
3177 assert_eq!(errors.len(), 1);
3178 let error = errors[0]
3179 .downcast_ref::<LanguageModelCompletionError>()
3180 .unwrap();
3181 assert!(matches!(
3182 error,
3183 LanguageModelCompletionError::ServerOverloaded { .. }
3184 ));
3185}
3186
3187/// Filters out the stop events for asserting against in tests
3188fn stop_events(result_events: Vec<Result<ThreadEvent>>) -> Vec<acp::StopReason> {
3189 result_events
3190 .into_iter()
3191 .filter_map(|event| match event.unwrap() {
3192 ThreadEvent::Stop(stop_reason) => Some(stop_reason),
3193 _ => None,
3194 })
3195 .collect()
3196}
3197
3198struct ThreadTest {
3199 model: Arc<dyn LanguageModel>,
3200 thread: Entity<Thread>,
3201 project_context: Entity<ProjectContext>,
3202 context_server_store: Entity<ContextServerStore>,
3203 fs: Arc<FakeFs>,
3204}
3205
3206enum TestModel {
3207 Sonnet4,
3208 Fake,
3209}
3210
3211impl TestModel {
3212 fn id(&self) -> LanguageModelId {
3213 match self {
3214 TestModel::Sonnet4 => LanguageModelId("claude-sonnet-4-latest".into()),
3215 TestModel::Fake => unreachable!(),
3216 }
3217 }
3218}
3219
3220async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
3221 cx.executor().allow_parking();
3222
3223 let fs = FakeFs::new(cx.background_executor.clone());
3224 fs.create_dir(paths::settings_file().parent().unwrap())
3225 .await
3226 .unwrap();
3227 fs.insert_file(
3228 paths::settings_file(),
3229 json!({
3230 "agent": {
3231 "default_profile": "test-profile",
3232 "profiles": {
3233 "test-profile": {
3234 "name": "Test Profile",
3235 "tools": {
3236 EchoTool::name(): true,
3237 DelayTool::name(): true,
3238 WordListTool::name(): true,
3239 ToolRequiringPermission::name(): true,
3240 InfiniteTool::name(): true,
3241 ThinkingTool::name(): true,
3242 "terminal": true,
3243 }
3244 }
3245 }
3246 }
3247 })
3248 .to_string()
3249 .into_bytes(),
3250 )
3251 .await;
3252
3253 cx.update(|cx| {
3254 settings::init(cx);
3255
3256 match model {
3257 TestModel::Fake => {}
3258 TestModel::Sonnet4 => {
3259 gpui_tokio::init(cx);
3260 let http_client = ReqwestClient::user_agent("agent tests").unwrap();
3261 cx.set_http_client(Arc::new(http_client));
3262 let client = Client::production(cx);
3263 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
3264 language_model::init(client.clone(), cx);
3265 language_models::init(user_store, client.clone(), cx);
3266 }
3267 };
3268
3269 watch_settings(fs.clone(), cx);
3270 });
3271
3272 let templates = Templates::new();
3273
3274 fs.insert_tree(path!("/test"), json!({})).await;
3275 let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
3276
3277 let model = cx
3278 .update(|cx| {
3279 if let TestModel::Fake = model {
3280 Task::ready(Arc::new(FakeLanguageModel::default()) as Arc<_>)
3281 } else {
3282 let model_id = model.id();
3283 let models = LanguageModelRegistry::read_global(cx);
3284 let model = models
3285 .available_models(cx)
3286 .find(|model| model.id() == model_id)
3287 .unwrap();
3288
3289 let provider = models.provider(&model.provider_id()).unwrap();
3290 let authenticated = provider.authenticate(cx);
3291
3292 cx.spawn(async move |_cx| {
3293 authenticated.await.unwrap();
3294 model
3295 })
3296 }
3297 })
3298 .await;
3299
3300 let project_context = cx.new(|_cx| ProjectContext::default());
3301 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3302 let context_server_registry =
3303 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3304 let thread = cx.new(|cx| {
3305 Thread::new(
3306 project,
3307 project_context.clone(),
3308 context_server_registry,
3309 templates,
3310 Some(model.clone()),
3311 cx,
3312 )
3313 });
3314 ThreadTest {
3315 model,
3316 thread,
3317 project_context,
3318 context_server_store,
3319 fs,
3320 }
3321}
3322
3323#[cfg(test)]
3324#[ctor::ctor]
3325fn init_logger() {
3326 if std::env::var("RUST_LOG").is_ok() {
3327 env_logger::init();
3328 }
3329}
3330
3331fn watch_settings(fs: Arc<dyn Fs>, cx: &mut App) {
3332 let fs = fs.clone();
3333 cx.spawn({
3334 async move |cx| {
3335 let mut new_settings_content_rx = settings::watch_config_file(
3336 cx.background_executor(),
3337 fs,
3338 paths::settings_file().clone(),
3339 );
3340
3341 while let Some(new_settings_content) = new_settings_content_rx.next().await {
3342 cx.update(|cx| {
3343 SettingsStore::update_global(cx, |settings, cx| {
3344 settings.set_user_settings(&new_settings_content, cx)
3345 })
3346 })
3347 .ok();
3348 }
3349 }
3350 })
3351 .detach();
3352}
3353
3354fn tool_names_for_completion(completion: &LanguageModelRequest) -> Vec<String> {
3355 completion
3356 .tools
3357 .iter()
3358 .map(|tool| tool.name.clone())
3359 .collect()
3360}
3361
3362fn setup_context_server(
3363 name: &'static str,
3364 tools: Vec<context_server::types::Tool>,
3365 context_server_store: &Entity<ContextServerStore>,
3366 cx: &mut TestAppContext,
3367) -> mpsc::UnboundedReceiver<(
3368 context_server::types::CallToolParams,
3369 oneshot::Sender<context_server::types::CallToolResponse>,
3370)> {
3371 cx.update(|cx| {
3372 let mut settings = ProjectSettings::get_global(cx).clone();
3373 settings.context_servers.insert(
3374 name.into(),
3375 project::project_settings::ContextServerSettings::Stdio {
3376 enabled: true,
3377 command: ContextServerCommand {
3378 path: "somebinary".into(),
3379 args: Vec::new(),
3380 env: None,
3381 timeout: None,
3382 },
3383 },
3384 );
3385 ProjectSettings::override_global(settings, cx);
3386 });
3387
3388 let (mcp_tool_calls_tx, mcp_tool_calls_rx) = mpsc::unbounded();
3389 let fake_transport = context_server::test::create_fake_transport(name, cx.executor())
3390 .on_request::<context_server::types::requests::Initialize, _>(move |_params| async move {
3391 context_server::types::InitializeResponse {
3392 protocol_version: context_server::types::ProtocolVersion(
3393 context_server::types::LATEST_PROTOCOL_VERSION.to_string(),
3394 ),
3395 server_info: context_server::types::Implementation {
3396 name: name.into(),
3397 version: "1.0.0".to_string(),
3398 },
3399 capabilities: context_server::types::ServerCapabilities {
3400 tools: Some(context_server::types::ToolsCapabilities {
3401 list_changed: Some(true),
3402 }),
3403 ..Default::default()
3404 },
3405 meta: None,
3406 }
3407 })
3408 .on_request::<context_server::types::requests::ListTools, _>(move |_params| {
3409 let tools = tools.clone();
3410 async move {
3411 context_server::types::ListToolsResponse {
3412 tools,
3413 next_cursor: None,
3414 meta: None,
3415 }
3416 }
3417 })
3418 .on_request::<context_server::types::requests::CallTool, _>(move |params| {
3419 let mcp_tool_calls_tx = mcp_tool_calls_tx.clone();
3420 async move {
3421 let (response_tx, response_rx) = oneshot::channel();
3422 mcp_tool_calls_tx
3423 .unbounded_send((params, response_tx))
3424 .unwrap();
3425 response_rx.await.unwrap()
3426 }
3427 });
3428 context_server_store.update(cx, |store, cx| {
3429 store.start_server(
3430 Arc::new(ContextServer::new(
3431 ContextServerId(name.into()),
3432 Arc::new(fake_transport),
3433 )),
3434 cx,
3435 );
3436 });
3437 cx.run_until_parked();
3438 mcp_tool_calls_rx
3439}
3440
3441#[gpui::test]
3442async fn test_tokens_before_message(cx: &mut TestAppContext) {
3443 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3444 let fake_model = model.as_fake();
3445
3446 // First message
3447 let message_1_id = UserMessageId::new();
3448 thread
3449 .update(cx, |thread, cx| {
3450 thread.send(message_1_id.clone(), ["First message"], cx)
3451 })
3452 .unwrap();
3453 cx.run_until_parked();
3454
3455 // Before any response, tokens_before_message should return None for first message
3456 thread.read_with(cx, |thread, _| {
3457 assert_eq!(
3458 thread.tokens_before_message(&message_1_id),
3459 None,
3460 "First message should have no tokens before it"
3461 );
3462 });
3463
3464 // Complete first message with usage
3465 fake_model.send_last_completion_stream_text_chunk("Response 1");
3466 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3467 language_model::TokenUsage {
3468 input_tokens: 100,
3469 output_tokens: 50,
3470 cache_creation_input_tokens: 0,
3471 cache_read_input_tokens: 0,
3472 },
3473 ));
3474 fake_model.end_last_completion_stream();
3475 cx.run_until_parked();
3476
3477 // First message still has no tokens before it
3478 thread.read_with(cx, |thread, _| {
3479 assert_eq!(
3480 thread.tokens_before_message(&message_1_id),
3481 None,
3482 "First message should still have no tokens before it after response"
3483 );
3484 });
3485
3486 // Second message
3487 let message_2_id = UserMessageId::new();
3488 thread
3489 .update(cx, |thread, cx| {
3490 thread.send(message_2_id.clone(), ["Second message"], cx)
3491 })
3492 .unwrap();
3493 cx.run_until_parked();
3494
3495 // Second message should have first message's input tokens before it
3496 thread.read_with(cx, |thread, _| {
3497 assert_eq!(
3498 thread.tokens_before_message(&message_2_id),
3499 Some(100),
3500 "Second message should have 100 tokens before it (from first request)"
3501 );
3502 });
3503
3504 // Complete second message
3505 fake_model.send_last_completion_stream_text_chunk("Response 2");
3506 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3507 language_model::TokenUsage {
3508 input_tokens: 250, // Total for this request (includes previous context)
3509 output_tokens: 75,
3510 cache_creation_input_tokens: 0,
3511 cache_read_input_tokens: 0,
3512 },
3513 ));
3514 fake_model.end_last_completion_stream();
3515 cx.run_until_parked();
3516
3517 // Third message
3518 let message_3_id = UserMessageId::new();
3519 thread
3520 .update(cx, |thread, cx| {
3521 thread.send(message_3_id.clone(), ["Third message"], cx)
3522 })
3523 .unwrap();
3524 cx.run_until_parked();
3525
3526 // Third message should have second message's input tokens (250) before it
3527 thread.read_with(cx, |thread, _| {
3528 assert_eq!(
3529 thread.tokens_before_message(&message_3_id),
3530 Some(250),
3531 "Third message should have 250 tokens before it (from second request)"
3532 );
3533 // Second message should still have 100
3534 assert_eq!(
3535 thread.tokens_before_message(&message_2_id),
3536 Some(100),
3537 "Second message should still have 100 tokens before it"
3538 );
3539 // First message still has none
3540 assert_eq!(
3541 thread.tokens_before_message(&message_1_id),
3542 None,
3543 "First message should still have no tokens before it"
3544 );
3545 });
3546}
3547
3548#[gpui::test]
3549async fn test_tokens_before_message_after_truncate(cx: &mut TestAppContext) {
3550 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3551 let fake_model = model.as_fake();
3552
3553 // Set up three messages with responses
3554 let message_1_id = UserMessageId::new();
3555 thread
3556 .update(cx, |thread, cx| {
3557 thread.send(message_1_id.clone(), ["Message 1"], cx)
3558 })
3559 .unwrap();
3560 cx.run_until_parked();
3561 fake_model.send_last_completion_stream_text_chunk("Response 1");
3562 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3563 language_model::TokenUsage {
3564 input_tokens: 100,
3565 output_tokens: 50,
3566 cache_creation_input_tokens: 0,
3567 cache_read_input_tokens: 0,
3568 },
3569 ));
3570 fake_model.end_last_completion_stream();
3571 cx.run_until_parked();
3572
3573 let message_2_id = UserMessageId::new();
3574 thread
3575 .update(cx, |thread, cx| {
3576 thread.send(message_2_id.clone(), ["Message 2"], cx)
3577 })
3578 .unwrap();
3579 cx.run_until_parked();
3580 fake_model.send_last_completion_stream_text_chunk("Response 2");
3581 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3582 language_model::TokenUsage {
3583 input_tokens: 250,
3584 output_tokens: 75,
3585 cache_creation_input_tokens: 0,
3586 cache_read_input_tokens: 0,
3587 },
3588 ));
3589 fake_model.end_last_completion_stream();
3590 cx.run_until_parked();
3591
3592 // Verify initial state
3593 thread.read_with(cx, |thread, _| {
3594 assert_eq!(thread.tokens_before_message(&message_2_id), Some(100));
3595 });
3596
3597 // Truncate at message 2 (removes message 2 and everything after)
3598 thread
3599 .update(cx, |thread, cx| thread.truncate(message_2_id.clone(), cx))
3600 .unwrap();
3601 cx.run_until_parked();
3602
3603 // After truncation, message_2_id no longer exists, so lookup should return None
3604 thread.read_with(cx, |thread, _| {
3605 assert_eq!(
3606 thread.tokens_before_message(&message_2_id),
3607 None,
3608 "After truncation, message 2 no longer exists"
3609 );
3610 // Message 1 still exists but has no tokens before it
3611 assert_eq!(
3612 thread.tokens_before_message(&message_1_id),
3613 None,
3614 "First message still has no tokens before it"
3615 );
3616 });
3617}
3618
3619#[gpui::test]
3620async fn test_terminal_tool_permission_rules(cx: &mut TestAppContext) {
3621 init_test(cx);
3622
3623 let fs = FakeFs::new(cx.executor());
3624 fs.insert_tree("/root", json!({})).await;
3625 let project = Project::test(fs, ["/root".as_ref()], cx).await;
3626
3627 // Test 1: Deny rule blocks command
3628 {
3629 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3630 let environment = Rc::new(FakeThreadEnvironment {
3631 handle: handle.clone(),
3632 });
3633
3634 cx.update(|cx| {
3635 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3636 settings.tool_permissions.tools.insert(
3637 "terminal".into(),
3638 agent_settings::ToolRules {
3639 default_mode: settings::ToolPermissionMode::Confirm,
3640 always_allow: vec![],
3641 always_deny: vec![
3642 agent_settings::CompiledRegex::new(r"rm\s+-rf", false).unwrap(),
3643 ],
3644 always_confirm: vec![],
3645 invalid_patterns: vec![],
3646 },
3647 );
3648 agent_settings::AgentSettings::override_global(settings, cx);
3649 });
3650
3651 #[allow(clippy::arc_with_non_send_sync)]
3652 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3653 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3654
3655 let task = cx.update(|cx| {
3656 tool.run(
3657 crate::TerminalToolInput {
3658 command: "rm -rf /".to_string(),
3659 cd: ".".to_string(),
3660 timeout_ms: None,
3661 },
3662 event_stream,
3663 cx,
3664 )
3665 });
3666
3667 let result = task.await;
3668 assert!(
3669 result.is_err(),
3670 "expected command to be blocked by deny rule"
3671 );
3672 assert!(
3673 result.unwrap_err().to_string().contains("blocked"),
3674 "error should mention the command was blocked"
3675 );
3676 }
3677
3678 // Test 2: Allow rule skips confirmation (and overrides default_mode: Deny)
3679 {
3680 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3681 let environment = Rc::new(FakeThreadEnvironment {
3682 handle: handle.clone(),
3683 });
3684
3685 cx.update(|cx| {
3686 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3687 settings.always_allow_tool_actions = false;
3688 settings.tool_permissions.tools.insert(
3689 "terminal".into(),
3690 agent_settings::ToolRules {
3691 default_mode: settings::ToolPermissionMode::Deny,
3692 always_allow: vec![
3693 agent_settings::CompiledRegex::new(r"^echo\s", false).unwrap(),
3694 ],
3695 always_deny: vec![],
3696 always_confirm: vec![],
3697 invalid_patterns: vec![],
3698 },
3699 );
3700 agent_settings::AgentSettings::override_global(settings, cx);
3701 });
3702
3703 #[allow(clippy::arc_with_non_send_sync)]
3704 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3705 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3706
3707 let task = cx.update(|cx| {
3708 tool.run(
3709 crate::TerminalToolInput {
3710 command: "echo hello".to_string(),
3711 cd: ".".to_string(),
3712 timeout_ms: None,
3713 },
3714 event_stream,
3715 cx,
3716 )
3717 });
3718
3719 let update = rx.expect_update_fields().await;
3720 assert!(
3721 update.content.iter().any(|blocks| {
3722 blocks
3723 .iter()
3724 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
3725 }),
3726 "expected terminal content (allow rule should skip confirmation and override default deny)"
3727 );
3728
3729 let result = task.await;
3730 assert!(
3731 result.is_ok(),
3732 "expected command to succeed without confirmation"
3733 );
3734 }
3735
3736 // Test 3: Confirm rule forces confirmation even with always_allow_tool_actions=true
3737 {
3738 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3739 let environment = Rc::new(FakeThreadEnvironment {
3740 handle: handle.clone(),
3741 });
3742
3743 cx.update(|cx| {
3744 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3745 settings.always_allow_tool_actions = true;
3746 settings.tool_permissions.tools.insert(
3747 "terminal".into(),
3748 agent_settings::ToolRules {
3749 default_mode: settings::ToolPermissionMode::Allow,
3750 always_allow: vec![],
3751 always_deny: vec![],
3752 always_confirm: vec![
3753 agent_settings::CompiledRegex::new(r"sudo", false).unwrap(),
3754 ],
3755 invalid_patterns: vec![],
3756 },
3757 );
3758 agent_settings::AgentSettings::override_global(settings, cx);
3759 });
3760
3761 #[allow(clippy::arc_with_non_send_sync)]
3762 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3763 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3764
3765 let _task = cx.update(|cx| {
3766 tool.run(
3767 crate::TerminalToolInput {
3768 command: "sudo rm file".to_string(),
3769 cd: ".".to_string(),
3770 timeout_ms: None,
3771 },
3772 event_stream,
3773 cx,
3774 )
3775 });
3776
3777 let auth = rx.expect_authorization().await;
3778 assert!(
3779 auth.tool_call.fields.title.is_some(),
3780 "expected authorization request for sudo command despite always_allow_tool_actions=true"
3781 );
3782 }
3783
3784 // Test 4: default_mode: Deny blocks commands when no pattern matches
3785 {
3786 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3787 let environment = Rc::new(FakeThreadEnvironment {
3788 handle: handle.clone(),
3789 });
3790
3791 cx.update(|cx| {
3792 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3793 settings.always_allow_tool_actions = true;
3794 settings.tool_permissions.tools.insert(
3795 "terminal".into(),
3796 agent_settings::ToolRules {
3797 default_mode: settings::ToolPermissionMode::Deny,
3798 always_allow: vec![],
3799 always_deny: vec![],
3800 always_confirm: vec![],
3801 invalid_patterns: vec![],
3802 },
3803 );
3804 agent_settings::AgentSettings::override_global(settings, cx);
3805 });
3806
3807 #[allow(clippy::arc_with_non_send_sync)]
3808 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3809 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3810
3811 let task = cx.update(|cx| {
3812 tool.run(
3813 crate::TerminalToolInput {
3814 command: "echo hello".to_string(),
3815 cd: ".".to_string(),
3816 timeout_ms: None,
3817 },
3818 event_stream,
3819 cx,
3820 )
3821 });
3822
3823 let result = task.await;
3824 assert!(
3825 result.is_err(),
3826 "expected command to be blocked by default_mode: Deny"
3827 );
3828 assert!(
3829 result.unwrap_err().to_string().contains("disabled"),
3830 "error should mention the tool is disabled"
3831 );
3832 }
3833}