1use super::*;
2use acp_thread::{AgentConnection, AgentModelGroupName, AgentModelList, UserMessageId};
3use agent_client_protocol::{self as acp};
4use agent_settings::AgentProfileId;
5use anyhow::Result;
6use client::{Client, UserStore};
7use cloud_llm_client::CompletionIntent;
8use collections::IndexMap;
9use context_server::{ContextServer, ContextServerCommand, ContextServerId};
10use fs::{FakeFs, Fs};
11use futures::{
12 FutureExt as _, StreamExt,
13 channel::{
14 mpsc::{self, UnboundedReceiver},
15 oneshot,
16 },
17 future::{Fuse, Shared},
18};
19use gpui::{
20 App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal,
21 http_client::FakeHttpClient,
22};
23use indoc::indoc;
24use language_model::{
25 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
26 LanguageModelProviderName, LanguageModelRegistry, LanguageModelRequest,
27 LanguageModelRequestMessage, LanguageModelToolResult, LanguageModelToolSchemaFormat,
28 LanguageModelToolUse, MessageContent, Role, StopReason, fake_provider::FakeLanguageModel,
29};
30use pretty_assertions::assert_eq;
31use project::{
32 Project, context_server_store::ContextServerStore, project_settings::ProjectSettings,
33};
34use prompt_store::ProjectContext;
35use reqwest_client::ReqwestClient;
36use schemars::JsonSchema;
37use serde::{Deserialize, Serialize};
38use serde_json::json;
39use settings::{Settings, SettingsStore};
40use std::{
41 path::Path,
42 pin::Pin,
43 rc::Rc,
44 sync::{
45 Arc,
46 atomic::{AtomicBool, Ordering},
47 },
48 time::Duration,
49};
50use util::path;
51
52mod test_tools;
53use test_tools::*;
54
55fn init_test(cx: &mut TestAppContext) {
56 cx.update(|cx| {
57 let settings_store = SettingsStore::test(cx);
58 cx.set_global(settings_store);
59 });
60}
61
62struct FakeTerminalHandle {
63 killed: Arc<AtomicBool>,
64 stopped_by_user: Arc<AtomicBool>,
65 exit_sender: std::cell::RefCell<Option<futures::channel::oneshot::Sender<()>>>,
66 wait_for_exit: Shared<Task<acp::TerminalExitStatus>>,
67 output: acp::TerminalOutputResponse,
68 id: acp::TerminalId,
69}
70
71impl FakeTerminalHandle {
72 fn new_never_exits(cx: &mut App) -> Self {
73 let killed = Arc::new(AtomicBool::new(false));
74 let stopped_by_user = Arc::new(AtomicBool::new(false));
75
76 let (exit_sender, exit_receiver) = futures::channel::oneshot::channel();
77
78 let wait_for_exit = cx
79 .spawn(async move |_cx| {
80 // Wait for the exit signal (sent when kill() is called)
81 let _ = exit_receiver.await;
82 acp::TerminalExitStatus::new()
83 })
84 .shared();
85
86 Self {
87 killed,
88 stopped_by_user,
89 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
90 wait_for_exit,
91 output: acp::TerminalOutputResponse::new("partial output".to_string(), false),
92 id: acp::TerminalId::new("fake_terminal".to_string()),
93 }
94 }
95
96 fn new_with_immediate_exit(cx: &mut App, exit_code: u32) -> Self {
97 let killed = Arc::new(AtomicBool::new(false));
98 let stopped_by_user = Arc::new(AtomicBool::new(false));
99 let (exit_sender, _exit_receiver) = futures::channel::oneshot::channel();
100
101 let wait_for_exit = cx
102 .spawn(async move |_cx| acp::TerminalExitStatus::new().exit_code(exit_code))
103 .shared();
104
105 Self {
106 killed,
107 stopped_by_user,
108 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
109 wait_for_exit,
110 output: acp::TerminalOutputResponse::new("command output".to_string(), false),
111 id: acp::TerminalId::new("fake_terminal".to_string()),
112 }
113 }
114
115 fn was_killed(&self) -> bool {
116 self.killed.load(Ordering::SeqCst)
117 }
118
119 fn set_stopped_by_user(&self, stopped: bool) {
120 self.stopped_by_user.store(stopped, Ordering::SeqCst);
121 }
122
123 fn signal_exit(&self) {
124 if let Some(sender) = self.exit_sender.borrow_mut().take() {
125 let _ = sender.send(());
126 }
127 }
128}
129
130impl crate::TerminalHandle for FakeTerminalHandle {
131 fn id(&self, _cx: &AsyncApp) -> Result<acp::TerminalId> {
132 Ok(self.id.clone())
133 }
134
135 fn current_output(&self, _cx: &AsyncApp) -> Result<acp::TerminalOutputResponse> {
136 Ok(self.output.clone())
137 }
138
139 fn wait_for_exit(&self, _cx: &AsyncApp) -> Result<Shared<Task<acp::TerminalExitStatus>>> {
140 Ok(self.wait_for_exit.clone())
141 }
142
143 fn kill(&self, _cx: &AsyncApp) -> Result<()> {
144 self.killed.store(true, Ordering::SeqCst);
145 self.signal_exit();
146 Ok(())
147 }
148
149 fn was_stopped_by_user(&self, _cx: &AsyncApp) -> Result<bool> {
150 Ok(self.stopped_by_user.load(Ordering::SeqCst))
151 }
152}
153
154struct FakeThreadEnvironment {
155 handle: Rc<FakeTerminalHandle>,
156}
157
158impl crate::ThreadEnvironment for FakeThreadEnvironment {
159 fn create_terminal(
160 &self,
161 _command: String,
162 _cwd: Option<std::path::PathBuf>,
163 _output_byte_limit: Option<u64>,
164 _cx: &mut AsyncApp,
165 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
166 Task::ready(Ok(self.handle.clone() as Rc<dyn crate::TerminalHandle>))
167 }
168}
169
170/// Environment that creates multiple independent terminal handles for testing concurrent terminals.
171struct MultiTerminalEnvironment {
172 handles: std::cell::RefCell<Vec<Rc<FakeTerminalHandle>>>,
173}
174
175impl MultiTerminalEnvironment {
176 fn new() -> Self {
177 Self {
178 handles: std::cell::RefCell::new(Vec::new()),
179 }
180 }
181
182 fn handles(&self) -> Vec<Rc<FakeTerminalHandle>> {
183 self.handles.borrow().clone()
184 }
185}
186
187impl crate::ThreadEnvironment for MultiTerminalEnvironment {
188 fn create_terminal(
189 &self,
190 _command: String,
191 _cwd: Option<std::path::PathBuf>,
192 _output_byte_limit: Option<u64>,
193 cx: &mut AsyncApp,
194 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
195 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
196 self.handles.borrow_mut().push(handle.clone());
197 Task::ready(Ok(handle as Rc<dyn crate::TerminalHandle>))
198 }
199}
200
201fn always_allow_tools(cx: &mut TestAppContext) {
202 cx.update(|cx| {
203 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
204 settings.always_allow_tool_actions = true;
205 agent_settings::AgentSettings::override_global(settings, cx);
206 });
207}
208
209#[gpui::test]
210async fn test_echo(cx: &mut TestAppContext) {
211 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
212 let fake_model = model.as_fake();
213
214 let events = thread
215 .update(cx, |thread, cx| {
216 thread.send(UserMessageId::new(), ["Testing: Reply with 'Hello'"], cx)
217 })
218 .unwrap();
219 cx.run_until_parked();
220 fake_model.send_last_completion_stream_text_chunk("Hello");
221 fake_model
222 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
223 fake_model.end_last_completion_stream();
224
225 let events = events.collect().await;
226 thread.update(cx, |thread, _cx| {
227 assert_eq!(
228 thread.last_message().unwrap().to_markdown(),
229 indoc! {"
230 ## Assistant
231
232 Hello
233 "}
234 )
235 });
236 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
237}
238
239#[gpui::test]
240async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) {
241 init_test(cx);
242 always_allow_tools(cx);
243
244 let fs = FakeFs::new(cx.executor());
245 let project = Project::test(fs, [], cx).await;
246
247 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
248 let environment = Rc::new(FakeThreadEnvironment {
249 handle: handle.clone(),
250 });
251
252 #[allow(clippy::arc_with_non_send_sync)]
253 let tool = Arc::new(crate::TerminalTool::new(project, environment));
254 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
255
256 let task = cx.update(|cx| {
257 tool.run(
258 crate::TerminalToolInput {
259 command: "sleep 1000".to_string(),
260 cd: ".".to_string(),
261 timeout_ms: Some(5),
262 },
263 event_stream,
264 cx,
265 )
266 });
267
268 let update = rx.expect_update_fields().await;
269 assert!(
270 update.content.iter().any(|blocks| {
271 blocks
272 .iter()
273 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
274 }),
275 "expected tool call update to include terminal content"
276 );
277
278 let mut task_future: Pin<Box<Fuse<Task<Result<String>>>>> = Box::pin(task.fuse());
279
280 let deadline = std::time::Instant::now() + Duration::from_millis(500);
281 loop {
282 if let Some(result) = task_future.as_mut().now_or_never() {
283 let result = result.expect("terminal tool task should complete");
284
285 assert!(
286 handle.was_killed(),
287 "expected terminal handle to be killed on timeout"
288 );
289 assert!(
290 result.contains("partial output"),
291 "expected result to include terminal output, got: {result}"
292 );
293 return;
294 }
295
296 if std::time::Instant::now() >= deadline {
297 panic!("timed out waiting for terminal tool task to complete");
298 }
299
300 cx.run_until_parked();
301 cx.background_executor.timer(Duration::from_millis(1)).await;
302 }
303}
304
305#[gpui::test]
306#[ignore]
307async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) {
308 init_test(cx);
309 always_allow_tools(cx);
310
311 let fs = FakeFs::new(cx.executor());
312 let project = Project::test(fs, [], cx).await;
313
314 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
315 let environment = Rc::new(FakeThreadEnvironment {
316 handle: handle.clone(),
317 });
318
319 #[allow(clippy::arc_with_non_send_sync)]
320 let tool = Arc::new(crate::TerminalTool::new(project, environment));
321 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
322
323 let _task = cx.update(|cx| {
324 tool.run(
325 crate::TerminalToolInput {
326 command: "sleep 1000".to_string(),
327 cd: ".".to_string(),
328 timeout_ms: None,
329 },
330 event_stream,
331 cx,
332 )
333 });
334
335 let update = rx.expect_update_fields().await;
336 assert!(
337 update.content.iter().any(|blocks| {
338 blocks
339 .iter()
340 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
341 }),
342 "expected tool call update to include terminal content"
343 );
344
345 smol::Timer::after(Duration::from_millis(25)).await;
346
347 assert!(
348 !handle.was_killed(),
349 "did not expect terminal handle to be killed without a timeout"
350 );
351}
352
353#[gpui::test]
354async fn test_thinking(cx: &mut TestAppContext) {
355 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
356 let fake_model = model.as_fake();
357
358 let events = thread
359 .update(cx, |thread, cx| {
360 thread.send(
361 UserMessageId::new(),
362 [indoc! {"
363 Testing:
364
365 Generate a thinking step where you just think the word 'Think',
366 and have your final answer be 'Hello'
367 "}],
368 cx,
369 )
370 })
371 .unwrap();
372 cx.run_until_parked();
373 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Thinking {
374 text: "Think".to_string(),
375 signature: None,
376 });
377 fake_model.send_last_completion_stream_text_chunk("Hello");
378 fake_model
379 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
380 fake_model.end_last_completion_stream();
381
382 let events = events.collect().await;
383 thread.update(cx, |thread, _cx| {
384 assert_eq!(
385 thread.last_message().unwrap().to_markdown(),
386 indoc! {"
387 ## Assistant
388
389 <think>Think</think>
390 Hello
391 "}
392 )
393 });
394 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
395}
396
397#[gpui::test]
398async fn test_system_prompt(cx: &mut TestAppContext) {
399 let ThreadTest {
400 model,
401 thread,
402 project_context,
403 ..
404 } = setup(cx, TestModel::Fake).await;
405 let fake_model = model.as_fake();
406
407 project_context.update(cx, |project_context, _cx| {
408 project_context.shell = "test-shell".into()
409 });
410 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
411 thread
412 .update(cx, |thread, cx| {
413 thread.send(UserMessageId::new(), ["abc"], cx)
414 })
415 .unwrap();
416 cx.run_until_parked();
417 let mut pending_completions = fake_model.pending_completions();
418 assert_eq!(
419 pending_completions.len(),
420 1,
421 "unexpected pending completions: {:?}",
422 pending_completions
423 );
424
425 let pending_completion = pending_completions.pop().unwrap();
426 assert_eq!(pending_completion.messages[0].role, Role::System);
427
428 let system_message = &pending_completion.messages[0];
429 let system_prompt = system_message.content[0].to_str().unwrap();
430 assert!(
431 system_prompt.contains("test-shell"),
432 "unexpected system message: {:?}",
433 system_message
434 );
435 assert!(
436 system_prompt.contains("## Fixing Diagnostics"),
437 "unexpected system message: {:?}",
438 system_message
439 );
440}
441
442#[gpui::test]
443async fn test_system_prompt_without_tools(cx: &mut TestAppContext) {
444 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
445 let fake_model = model.as_fake();
446
447 thread
448 .update(cx, |thread, cx| {
449 thread.send(UserMessageId::new(), ["abc"], cx)
450 })
451 .unwrap();
452 cx.run_until_parked();
453 let mut pending_completions = fake_model.pending_completions();
454 assert_eq!(
455 pending_completions.len(),
456 1,
457 "unexpected pending completions: {:?}",
458 pending_completions
459 );
460
461 let pending_completion = pending_completions.pop().unwrap();
462 assert_eq!(pending_completion.messages[0].role, Role::System);
463
464 let system_message = &pending_completion.messages[0];
465 let system_prompt = system_message.content[0].to_str().unwrap();
466 assert!(
467 !system_prompt.contains("## Tool Use"),
468 "unexpected system message: {:?}",
469 system_message
470 );
471 assert!(
472 !system_prompt.contains("## Fixing Diagnostics"),
473 "unexpected system message: {:?}",
474 system_message
475 );
476}
477
478#[gpui::test]
479async fn test_prompt_caching(cx: &mut TestAppContext) {
480 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
481 let fake_model = model.as_fake();
482
483 // Send initial user message and verify it's cached
484 thread
485 .update(cx, |thread, cx| {
486 thread.send(UserMessageId::new(), ["Message 1"], cx)
487 })
488 .unwrap();
489 cx.run_until_parked();
490
491 let completion = fake_model.pending_completions().pop().unwrap();
492 assert_eq!(
493 completion.messages[1..],
494 vec![LanguageModelRequestMessage {
495 role: Role::User,
496 content: vec!["Message 1".into()],
497 cache: true,
498 reasoning_details: None,
499 }]
500 );
501 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
502 "Response to Message 1".into(),
503 ));
504 fake_model.end_last_completion_stream();
505 cx.run_until_parked();
506
507 // Send another user message and verify only the latest is cached
508 thread
509 .update(cx, |thread, cx| {
510 thread.send(UserMessageId::new(), ["Message 2"], cx)
511 })
512 .unwrap();
513 cx.run_until_parked();
514
515 let completion = fake_model.pending_completions().pop().unwrap();
516 assert_eq!(
517 completion.messages[1..],
518 vec![
519 LanguageModelRequestMessage {
520 role: Role::User,
521 content: vec!["Message 1".into()],
522 cache: false,
523 reasoning_details: None,
524 },
525 LanguageModelRequestMessage {
526 role: Role::Assistant,
527 content: vec!["Response to Message 1".into()],
528 cache: false,
529 reasoning_details: None,
530 },
531 LanguageModelRequestMessage {
532 role: Role::User,
533 content: vec!["Message 2".into()],
534 cache: true,
535 reasoning_details: None,
536 }
537 ]
538 );
539 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
540 "Response to Message 2".into(),
541 ));
542 fake_model.end_last_completion_stream();
543 cx.run_until_parked();
544
545 // Simulate a tool call and verify that the latest tool result is cached
546 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
547 thread
548 .update(cx, |thread, cx| {
549 thread.send(UserMessageId::new(), ["Use the echo tool"], cx)
550 })
551 .unwrap();
552 cx.run_until_parked();
553
554 let tool_use = LanguageModelToolUse {
555 id: "tool_1".into(),
556 name: EchoTool::name().into(),
557 raw_input: json!({"text": "test"}).to_string(),
558 input: json!({"text": "test"}),
559 is_input_complete: true,
560 thought_signature: None,
561 };
562 fake_model
563 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
564 fake_model.end_last_completion_stream();
565 cx.run_until_parked();
566
567 let completion = fake_model.pending_completions().pop().unwrap();
568 let tool_result = LanguageModelToolResult {
569 tool_use_id: "tool_1".into(),
570 tool_name: EchoTool::name().into(),
571 is_error: false,
572 content: "test".into(),
573 output: Some("test".into()),
574 };
575 assert_eq!(
576 completion.messages[1..],
577 vec![
578 LanguageModelRequestMessage {
579 role: Role::User,
580 content: vec!["Message 1".into()],
581 cache: false,
582 reasoning_details: None,
583 },
584 LanguageModelRequestMessage {
585 role: Role::Assistant,
586 content: vec!["Response to Message 1".into()],
587 cache: false,
588 reasoning_details: None,
589 },
590 LanguageModelRequestMessage {
591 role: Role::User,
592 content: vec!["Message 2".into()],
593 cache: false,
594 reasoning_details: None,
595 },
596 LanguageModelRequestMessage {
597 role: Role::Assistant,
598 content: vec!["Response to Message 2".into()],
599 cache: false,
600 reasoning_details: None,
601 },
602 LanguageModelRequestMessage {
603 role: Role::User,
604 content: vec!["Use the echo tool".into()],
605 cache: false,
606 reasoning_details: None,
607 },
608 LanguageModelRequestMessage {
609 role: Role::Assistant,
610 content: vec![MessageContent::ToolUse(tool_use)],
611 cache: false,
612 reasoning_details: None,
613 },
614 LanguageModelRequestMessage {
615 role: Role::User,
616 content: vec![MessageContent::ToolResult(tool_result)],
617 cache: true,
618 reasoning_details: None,
619 }
620 ]
621 );
622}
623
624#[gpui::test]
625#[cfg_attr(not(feature = "e2e"), ignore)]
626async fn test_basic_tool_calls(cx: &mut TestAppContext) {
627 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
628
629 // Test a tool call that's likely to complete *before* streaming stops.
630 let events = thread
631 .update(cx, |thread, cx| {
632 thread.add_tool(EchoTool);
633 thread.send(
634 UserMessageId::new(),
635 ["Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'."],
636 cx,
637 )
638 })
639 .unwrap()
640 .collect()
641 .await;
642 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
643
644 // Test a tool calls that's likely to complete *after* streaming stops.
645 let events = thread
646 .update(cx, |thread, cx| {
647 thread.remove_tool(&EchoTool::name());
648 thread.add_tool(DelayTool);
649 thread.send(
650 UserMessageId::new(),
651 [
652 "Now call the delay tool with 200ms.",
653 "When the timer goes off, then you echo the output of the tool.",
654 ],
655 cx,
656 )
657 })
658 .unwrap()
659 .collect()
660 .await;
661 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
662 thread.update(cx, |thread, _cx| {
663 assert!(
664 thread
665 .last_message()
666 .unwrap()
667 .as_agent_message()
668 .unwrap()
669 .content
670 .iter()
671 .any(|content| {
672 if let AgentMessageContent::Text(text) = content {
673 text.contains("Ding")
674 } else {
675 false
676 }
677 }),
678 "{}",
679 thread.to_markdown()
680 );
681 });
682}
683
684#[gpui::test]
685#[cfg_attr(not(feature = "e2e"), ignore)]
686async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
687 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
688
689 // Test a tool call that's likely to complete *before* streaming stops.
690 let mut events = thread
691 .update(cx, |thread, cx| {
692 thread.add_tool(WordListTool);
693 thread.send(UserMessageId::new(), ["Test the word_list tool."], cx)
694 })
695 .unwrap();
696
697 let mut saw_partial_tool_use = false;
698 while let Some(event) = events.next().await {
699 if let Ok(ThreadEvent::ToolCall(tool_call)) = event {
700 thread.update(cx, |thread, _cx| {
701 // Look for a tool use in the thread's last message
702 let message = thread.last_message().unwrap();
703 let agent_message = message.as_agent_message().unwrap();
704 let last_content = agent_message.content.last().unwrap();
705 if let AgentMessageContent::ToolUse(last_tool_use) = last_content {
706 assert_eq!(last_tool_use.name.as_ref(), "word_list");
707 if tool_call.status == acp::ToolCallStatus::Pending {
708 if !last_tool_use.is_input_complete
709 && last_tool_use.input.get("g").is_none()
710 {
711 saw_partial_tool_use = true;
712 }
713 } else {
714 last_tool_use
715 .input
716 .get("a")
717 .expect("'a' has streamed because input is now complete");
718 last_tool_use
719 .input
720 .get("g")
721 .expect("'g' has streamed because input is now complete");
722 }
723 } else {
724 panic!("last content should be a tool use");
725 }
726 });
727 }
728 }
729
730 assert!(
731 saw_partial_tool_use,
732 "should see at least one partially streamed tool use in the history"
733 );
734}
735
736#[gpui::test]
737async fn test_tool_authorization(cx: &mut TestAppContext) {
738 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
739 let fake_model = model.as_fake();
740
741 let mut events = thread
742 .update(cx, |thread, cx| {
743 thread.add_tool(ToolRequiringPermission);
744 thread.send(UserMessageId::new(), ["abc"], cx)
745 })
746 .unwrap();
747 cx.run_until_parked();
748 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
749 LanguageModelToolUse {
750 id: "tool_id_1".into(),
751 name: ToolRequiringPermission::name().into(),
752 raw_input: "{}".into(),
753 input: json!({}),
754 is_input_complete: true,
755 thought_signature: None,
756 },
757 ));
758 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
759 LanguageModelToolUse {
760 id: "tool_id_2".into(),
761 name: ToolRequiringPermission::name().into(),
762 raw_input: "{}".into(),
763 input: json!({}),
764 is_input_complete: true,
765 thought_signature: None,
766 },
767 ));
768 fake_model.end_last_completion_stream();
769 let tool_call_auth_1 = next_tool_call_authorization(&mut events).await;
770 let tool_call_auth_2 = next_tool_call_authorization(&mut events).await;
771
772 // Approve the first
773 tool_call_auth_1
774 .response
775 .send(tool_call_auth_1.options[1].option_id.clone())
776 .unwrap();
777 cx.run_until_parked();
778
779 // Reject the second
780 tool_call_auth_2
781 .response
782 .send(tool_call_auth_1.options[2].option_id.clone())
783 .unwrap();
784 cx.run_until_parked();
785
786 let completion = fake_model.pending_completions().pop().unwrap();
787 let message = completion.messages.last().unwrap();
788 assert_eq!(
789 message.content,
790 vec![
791 language_model::MessageContent::ToolResult(LanguageModelToolResult {
792 tool_use_id: tool_call_auth_1.tool_call.tool_call_id.0.to_string().into(),
793 tool_name: ToolRequiringPermission::name().into(),
794 is_error: false,
795 content: "Allowed".into(),
796 output: Some("Allowed".into())
797 }),
798 language_model::MessageContent::ToolResult(LanguageModelToolResult {
799 tool_use_id: tool_call_auth_2.tool_call.tool_call_id.0.to_string().into(),
800 tool_name: ToolRequiringPermission::name().into(),
801 is_error: true,
802 content: "Permission to run tool denied by user".into(),
803 output: Some("Permission to run tool denied by user".into())
804 })
805 ]
806 );
807
808 // Simulate yet another tool call.
809 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
810 LanguageModelToolUse {
811 id: "tool_id_3".into(),
812 name: ToolRequiringPermission::name().into(),
813 raw_input: "{}".into(),
814 input: json!({}),
815 is_input_complete: true,
816 thought_signature: None,
817 },
818 ));
819 fake_model.end_last_completion_stream();
820
821 // Respond by always allowing tools.
822 let tool_call_auth_3 = next_tool_call_authorization(&mut events).await;
823 tool_call_auth_3
824 .response
825 .send(tool_call_auth_3.options[0].option_id.clone())
826 .unwrap();
827 cx.run_until_parked();
828 let completion = fake_model.pending_completions().pop().unwrap();
829 let message = completion.messages.last().unwrap();
830 assert_eq!(
831 message.content,
832 vec![language_model::MessageContent::ToolResult(
833 LanguageModelToolResult {
834 tool_use_id: tool_call_auth_3.tool_call.tool_call_id.0.to_string().into(),
835 tool_name: ToolRequiringPermission::name().into(),
836 is_error: false,
837 content: "Allowed".into(),
838 output: Some("Allowed".into())
839 }
840 )]
841 );
842
843 // Simulate a final tool call, ensuring we don't trigger authorization.
844 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
845 LanguageModelToolUse {
846 id: "tool_id_4".into(),
847 name: ToolRequiringPermission::name().into(),
848 raw_input: "{}".into(),
849 input: json!({}),
850 is_input_complete: true,
851 thought_signature: None,
852 },
853 ));
854 fake_model.end_last_completion_stream();
855 cx.run_until_parked();
856 let completion = fake_model.pending_completions().pop().unwrap();
857 let message = completion.messages.last().unwrap();
858 assert_eq!(
859 message.content,
860 vec![language_model::MessageContent::ToolResult(
861 LanguageModelToolResult {
862 tool_use_id: "tool_id_4".into(),
863 tool_name: ToolRequiringPermission::name().into(),
864 is_error: false,
865 content: "Allowed".into(),
866 output: Some("Allowed".into())
867 }
868 )]
869 );
870}
871
872#[gpui::test]
873async fn test_tool_hallucination(cx: &mut TestAppContext) {
874 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
875 let fake_model = model.as_fake();
876
877 let mut events = thread
878 .update(cx, |thread, cx| {
879 thread.send(UserMessageId::new(), ["abc"], cx)
880 })
881 .unwrap();
882 cx.run_until_parked();
883 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
884 LanguageModelToolUse {
885 id: "tool_id_1".into(),
886 name: "nonexistent_tool".into(),
887 raw_input: "{}".into(),
888 input: json!({}),
889 is_input_complete: true,
890 thought_signature: None,
891 },
892 ));
893 fake_model.end_last_completion_stream();
894
895 let tool_call = expect_tool_call(&mut events).await;
896 assert_eq!(tool_call.title, "nonexistent_tool");
897 assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
898 let update = expect_tool_call_update_fields(&mut events).await;
899 assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
900}
901
902#[gpui::test]
903async fn test_resume_after_tool_use_limit(cx: &mut TestAppContext) {
904 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
905 let fake_model = model.as_fake();
906
907 let events = thread
908 .update(cx, |thread, cx| {
909 thread.add_tool(EchoTool);
910 thread.send(UserMessageId::new(), ["abc"], cx)
911 })
912 .unwrap();
913 cx.run_until_parked();
914 let tool_use = LanguageModelToolUse {
915 id: "tool_id_1".into(),
916 name: EchoTool::name().into(),
917 raw_input: "{}".into(),
918 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
919 is_input_complete: true,
920 thought_signature: None,
921 };
922 fake_model
923 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
924 fake_model.end_last_completion_stream();
925
926 cx.run_until_parked();
927 let completion = fake_model.pending_completions().pop().unwrap();
928 let tool_result = LanguageModelToolResult {
929 tool_use_id: "tool_id_1".into(),
930 tool_name: EchoTool::name().into(),
931 is_error: false,
932 content: "def".into(),
933 output: Some("def".into()),
934 };
935 assert_eq!(
936 completion.messages[1..],
937 vec![
938 LanguageModelRequestMessage {
939 role: Role::User,
940 content: vec!["abc".into()],
941 cache: false,
942 reasoning_details: None,
943 },
944 LanguageModelRequestMessage {
945 role: Role::Assistant,
946 content: vec![MessageContent::ToolUse(tool_use.clone())],
947 cache: false,
948 reasoning_details: None,
949 },
950 LanguageModelRequestMessage {
951 role: Role::User,
952 content: vec![MessageContent::ToolResult(tool_result.clone())],
953 cache: true,
954 reasoning_details: None,
955 },
956 ]
957 );
958
959 // Simulate reaching tool use limit.
960 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
961 fake_model.end_last_completion_stream();
962 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
963 assert!(
964 last_event
965 .unwrap_err()
966 .is::<language_model::ToolUseLimitReachedError>()
967 );
968
969 let events = thread.update(cx, |thread, cx| thread.resume(cx)).unwrap();
970 cx.run_until_parked();
971 let completion = fake_model.pending_completions().pop().unwrap();
972 assert_eq!(
973 completion.messages[1..],
974 vec![
975 LanguageModelRequestMessage {
976 role: Role::User,
977 content: vec!["abc".into()],
978 cache: false,
979 reasoning_details: None,
980 },
981 LanguageModelRequestMessage {
982 role: Role::Assistant,
983 content: vec![MessageContent::ToolUse(tool_use)],
984 cache: false,
985 reasoning_details: None,
986 },
987 LanguageModelRequestMessage {
988 role: Role::User,
989 content: vec![MessageContent::ToolResult(tool_result)],
990 cache: false,
991 reasoning_details: None,
992 },
993 LanguageModelRequestMessage {
994 role: Role::User,
995 content: vec!["Continue where you left off".into()],
996 cache: true,
997 reasoning_details: None,
998 }
999 ]
1000 );
1001
1002 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text("Done".into()));
1003 fake_model.end_last_completion_stream();
1004 events.collect::<Vec<_>>().await;
1005 thread.read_with(cx, |thread, _cx| {
1006 assert_eq!(
1007 thread.last_message().unwrap().to_markdown(),
1008 indoc! {"
1009 ## Assistant
1010
1011 Done
1012 "}
1013 )
1014 });
1015}
1016
1017#[gpui::test]
1018async fn test_send_after_tool_use_limit(cx: &mut TestAppContext) {
1019 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1020 let fake_model = model.as_fake();
1021
1022 let events = thread
1023 .update(cx, |thread, cx| {
1024 thread.add_tool(EchoTool);
1025 thread.send(UserMessageId::new(), ["abc"], cx)
1026 })
1027 .unwrap();
1028 cx.run_until_parked();
1029
1030 let tool_use = LanguageModelToolUse {
1031 id: "tool_id_1".into(),
1032 name: EchoTool::name().into(),
1033 raw_input: "{}".into(),
1034 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
1035 is_input_complete: true,
1036 thought_signature: None,
1037 };
1038 let tool_result = LanguageModelToolResult {
1039 tool_use_id: "tool_id_1".into(),
1040 tool_name: EchoTool::name().into(),
1041 is_error: false,
1042 content: "def".into(),
1043 output: Some("def".into()),
1044 };
1045 fake_model
1046 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
1047 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
1048 fake_model.end_last_completion_stream();
1049 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
1050 assert!(
1051 last_event
1052 .unwrap_err()
1053 .is::<language_model::ToolUseLimitReachedError>()
1054 );
1055
1056 thread
1057 .update(cx, |thread, cx| {
1058 thread.send(UserMessageId::new(), vec!["ghi"], cx)
1059 })
1060 .unwrap();
1061 cx.run_until_parked();
1062 let completion = fake_model.pending_completions().pop().unwrap();
1063 assert_eq!(
1064 completion.messages[1..],
1065 vec![
1066 LanguageModelRequestMessage {
1067 role: Role::User,
1068 content: vec!["abc".into()],
1069 cache: false,
1070 reasoning_details: None,
1071 },
1072 LanguageModelRequestMessage {
1073 role: Role::Assistant,
1074 content: vec![MessageContent::ToolUse(tool_use)],
1075 cache: false,
1076 reasoning_details: None,
1077 },
1078 LanguageModelRequestMessage {
1079 role: Role::User,
1080 content: vec![MessageContent::ToolResult(tool_result)],
1081 cache: false,
1082 reasoning_details: None,
1083 },
1084 LanguageModelRequestMessage {
1085 role: Role::User,
1086 content: vec!["ghi".into()],
1087 cache: true,
1088 reasoning_details: None,
1089 }
1090 ]
1091 );
1092}
1093
1094async fn expect_tool_call(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::ToolCall {
1095 let event = events
1096 .next()
1097 .await
1098 .expect("no tool call authorization event received")
1099 .unwrap();
1100 match event {
1101 ThreadEvent::ToolCall(tool_call) => tool_call,
1102 event => {
1103 panic!("Unexpected event {event:?}");
1104 }
1105 }
1106}
1107
1108async fn expect_tool_call_update_fields(
1109 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1110) -> acp::ToolCallUpdate {
1111 let event = events
1112 .next()
1113 .await
1114 .expect("no tool call authorization event received")
1115 .unwrap();
1116 match event {
1117 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(update)) => update,
1118 event => {
1119 panic!("Unexpected event {event:?}");
1120 }
1121 }
1122}
1123
1124async fn next_tool_call_authorization(
1125 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1126) -> ToolCallAuthorization {
1127 loop {
1128 let event = events
1129 .next()
1130 .await
1131 .expect("no tool call authorization event received")
1132 .unwrap();
1133 if let ThreadEvent::ToolCallAuthorization(tool_call_authorization) = event {
1134 let permission_kinds = tool_call_authorization
1135 .options
1136 .iter()
1137 .map(|o| o.kind)
1138 .collect::<Vec<_>>();
1139 assert_eq!(
1140 permission_kinds,
1141 vec![
1142 acp::PermissionOptionKind::AllowAlways,
1143 acp::PermissionOptionKind::AllowOnce,
1144 acp::PermissionOptionKind::RejectOnce,
1145 ]
1146 );
1147 return tool_call_authorization;
1148 }
1149 }
1150}
1151
1152#[gpui::test]
1153#[cfg_attr(not(feature = "e2e"), ignore)]
1154async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
1155 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1156
1157 // Test concurrent tool calls with different delay times
1158 let events = thread
1159 .update(cx, |thread, cx| {
1160 thread.add_tool(DelayTool);
1161 thread.send(
1162 UserMessageId::new(),
1163 [
1164 "Call the delay tool twice in the same message.",
1165 "Once with 100ms. Once with 300ms.",
1166 "When both timers are complete, describe the outputs.",
1167 ],
1168 cx,
1169 )
1170 })
1171 .unwrap()
1172 .collect()
1173 .await;
1174
1175 let stop_reasons = stop_events(events);
1176 assert_eq!(stop_reasons, vec![acp::StopReason::EndTurn]);
1177
1178 thread.update(cx, |thread, _cx| {
1179 let last_message = thread.last_message().unwrap();
1180 let agent_message = last_message.as_agent_message().unwrap();
1181 let text = agent_message
1182 .content
1183 .iter()
1184 .filter_map(|content| {
1185 if let AgentMessageContent::Text(text) = content {
1186 Some(text.as_str())
1187 } else {
1188 None
1189 }
1190 })
1191 .collect::<String>();
1192
1193 assert!(text.contains("Ding"));
1194 });
1195}
1196
1197#[gpui::test]
1198async fn test_profiles(cx: &mut TestAppContext) {
1199 let ThreadTest {
1200 model, thread, fs, ..
1201 } = setup(cx, TestModel::Fake).await;
1202 let fake_model = model.as_fake();
1203
1204 thread.update(cx, |thread, _cx| {
1205 thread.add_tool(DelayTool);
1206 thread.add_tool(EchoTool);
1207 thread.add_tool(InfiniteTool);
1208 });
1209
1210 // Override profiles and wait for settings to be loaded.
1211 fs.insert_file(
1212 paths::settings_file(),
1213 json!({
1214 "agent": {
1215 "profiles": {
1216 "test-1": {
1217 "name": "Test Profile 1",
1218 "tools": {
1219 EchoTool::name(): true,
1220 DelayTool::name(): true,
1221 }
1222 },
1223 "test-2": {
1224 "name": "Test Profile 2",
1225 "tools": {
1226 InfiniteTool::name(): true,
1227 }
1228 }
1229 }
1230 }
1231 })
1232 .to_string()
1233 .into_bytes(),
1234 )
1235 .await;
1236 cx.run_until_parked();
1237
1238 // Test that test-1 profile (default) has echo and delay tools
1239 thread
1240 .update(cx, |thread, cx| {
1241 thread.set_profile(AgentProfileId("test-1".into()), cx);
1242 thread.send(UserMessageId::new(), ["test"], cx)
1243 })
1244 .unwrap();
1245 cx.run_until_parked();
1246
1247 let mut pending_completions = fake_model.pending_completions();
1248 assert_eq!(pending_completions.len(), 1);
1249 let completion = pending_completions.pop().unwrap();
1250 let tool_names: Vec<String> = completion
1251 .tools
1252 .iter()
1253 .map(|tool| tool.name.clone())
1254 .collect();
1255 assert_eq!(tool_names, vec![DelayTool::name(), EchoTool::name()]);
1256 fake_model.end_last_completion_stream();
1257
1258 // Switch to test-2 profile, and verify that it has only the infinite tool.
1259 thread
1260 .update(cx, |thread, cx| {
1261 thread.set_profile(AgentProfileId("test-2".into()), cx);
1262 thread.send(UserMessageId::new(), ["test2"], cx)
1263 })
1264 .unwrap();
1265 cx.run_until_parked();
1266 let mut pending_completions = fake_model.pending_completions();
1267 assert_eq!(pending_completions.len(), 1);
1268 let completion = pending_completions.pop().unwrap();
1269 let tool_names: Vec<String> = completion
1270 .tools
1271 .iter()
1272 .map(|tool| tool.name.clone())
1273 .collect();
1274 assert_eq!(tool_names, vec![InfiniteTool::name()]);
1275}
1276
1277#[gpui::test]
1278async fn test_mcp_tools(cx: &mut TestAppContext) {
1279 let ThreadTest {
1280 model,
1281 thread,
1282 context_server_store,
1283 fs,
1284 ..
1285 } = setup(cx, TestModel::Fake).await;
1286 let fake_model = model.as_fake();
1287
1288 // Override profiles and wait for settings to be loaded.
1289 fs.insert_file(
1290 paths::settings_file(),
1291 json!({
1292 "agent": {
1293 "always_allow_tool_actions": true,
1294 "profiles": {
1295 "test": {
1296 "name": "Test Profile",
1297 "enable_all_context_servers": true,
1298 "tools": {
1299 EchoTool::name(): true,
1300 }
1301 },
1302 }
1303 }
1304 })
1305 .to_string()
1306 .into_bytes(),
1307 )
1308 .await;
1309 cx.run_until_parked();
1310 thread.update(cx, |thread, cx| {
1311 thread.set_profile(AgentProfileId("test".into()), cx)
1312 });
1313
1314 let mut mcp_tool_calls = setup_context_server(
1315 "test_server",
1316 vec![context_server::types::Tool {
1317 name: "echo".into(),
1318 description: None,
1319 input_schema: serde_json::to_value(EchoTool::input_schema(
1320 LanguageModelToolSchemaFormat::JsonSchema,
1321 ))
1322 .unwrap(),
1323 output_schema: None,
1324 annotations: None,
1325 }],
1326 &context_server_store,
1327 cx,
1328 );
1329
1330 let events = thread.update(cx, |thread, cx| {
1331 thread.send(UserMessageId::new(), ["Hey"], cx).unwrap()
1332 });
1333 cx.run_until_parked();
1334
1335 // Simulate the model calling the MCP tool.
1336 let completion = fake_model.pending_completions().pop().unwrap();
1337 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1338 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1339 LanguageModelToolUse {
1340 id: "tool_1".into(),
1341 name: "echo".into(),
1342 raw_input: json!({"text": "test"}).to_string(),
1343 input: json!({"text": "test"}),
1344 is_input_complete: true,
1345 thought_signature: None,
1346 },
1347 ));
1348 fake_model.end_last_completion_stream();
1349 cx.run_until_parked();
1350
1351 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1352 assert_eq!(tool_call_params.name, "echo");
1353 assert_eq!(tool_call_params.arguments, Some(json!({"text": "test"})));
1354 tool_call_response
1355 .send(context_server::types::CallToolResponse {
1356 content: vec![context_server::types::ToolResponseContent::Text {
1357 text: "test".into(),
1358 }],
1359 is_error: None,
1360 meta: None,
1361 structured_content: None,
1362 })
1363 .unwrap();
1364 cx.run_until_parked();
1365
1366 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1367 fake_model.send_last_completion_stream_text_chunk("Done!");
1368 fake_model.end_last_completion_stream();
1369 events.collect::<Vec<_>>().await;
1370
1371 // Send again after adding the echo tool, ensuring the name collision is resolved.
1372 let events = thread.update(cx, |thread, cx| {
1373 thread.add_tool(EchoTool);
1374 thread.send(UserMessageId::new(), ["Go"], cx).unwrap()
1375 });
1376 cx.run_until_parked();
1377 let completion = fake_model.pending_completions().pop().unwrap();
1378 assert_eq!(
1379 tool_names_for_completion(&completion),
1380 vec!["echo", "test_server_echo"]
1381 );
1382 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1383 LanguageModelToolUse {
1384 id: "tool_2".into(),
1385 name: "test_server_echo".into(),
1386 raw_input: json!({"text": "mcp"}).to_string(),
1387 input: json!({"text": "mcp"}),
1388 is_input_complete: true,
1389 thought_signature: None,
1390 },
1391 ));
1392 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1393 LanguageModelToolUse {
1394 id: "tool_3".into(),
1395 name: "echo".into(),
1396 raw_input: json!({"text": "native"}).to_string(),
1397 input: json!({"text": "native"}),
1398 is_input_complete: true,
1399 thought_signature: None,
1400 },
1401 ));
1402 fake_model.end_last_completion_stream();
1403 cx.run_until_parked();
1404
1405 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1406 assert_eq!(tool_call_params.name, "echo");
1407 assert_eq!(tool_call_params.arguments, Some(json!({"text": "mcp"})));
1408 tool_call_response
1409 .send(context_server::types::CallToolResponse {
1410 content: vec![context_server::types::ToolResponseContent::Text { text: "mcp".into() }],
1411 is_error: None,
1412 meta: None,
1413 structured_content: None,
1414 })
1415 .unwrap();
1416 cx.run_until_parked();
1417
1418 // Ensure the tool results were inserted with the correct names.
1419 let completion = fake_model.pending_completions().pop().unwrap();
1420 assert_eq!(
1421 completion.messages.last().unwrap().content,
1422 vec![
1423 MessageContent::ToolResult(LanguageModelToolResult {
1424 tool_use_id: "tool_3".into(),
1425 tool_name: "echo".into(),
1426 is_error: false,
1427 content: "native".into(),
1428 output: Some("native".into()),
1429 },),
1430 MessageContent::ToolResult(LanguageModelToolResult {
1431 tool_use_id: "tool_2".into(),
1432 tool_name: "test_server_echo".into(),
1433 is_error: false,
1434 content: "mcp".into(),
1435 output: Some("mcp".into()),
1436 },),
1437 ]
1438 );
1439 fake_model.end_last_completion_stream();
1440 events.collect::<Vec<_>>().await;
1441}
1442
1443#[gpui::test]
1444async fn test_mcp_tool_truncation(cx: &mut TestAppContext) {
1445 let ThreadTest {
1446 model,
1447 thread,
1448 context_server_store,
1449 fs,
1450 ..
1451 } = setup(cx, TestModel::Fake).await;
1452 let fake_model = model.as_fake();
1453
1454 // Set up a profile with all tools enabled
1455 fs.insert_file(
1456 paths::settings_file(),
1457 json!({
1458 "agent": {
1459 "profiles": {
1460 "test": {
1461 "name": "Test Profile",
1462 "enable_all_context_servers": true,
1463 "tools": {
1464 EchoTool::name(): true,
1465 DelayTool::name(): true,
1466 WordListTool::name(): true,
1467 ToolRequiringPermission::name(): true,
1468 InfiniteTool::name(): true,
1469 }
1470 },
1471 }
1472 }
1473 })
1474 .to_string()
1475 .into_bytes(),
1476 )
1477 .await;
1478 cx.run_until_parked();
1479
1480 thread.update(cx, |thread, cx| {
1481 thread.set_profile(AgentProfileId("test".into()), cx);
1482 thread.add_tool(EchoTool);
1483 thread.add_tool(DelayTool);
1484 thread.add_tool(WordListTool);
1485 thread.add_tool(ToolRequiringPermission);
1486 thread.add_tool(InfiniteTool);
1487 });
1488
1489 // Set up multiple context servers with some overlapping tool names
1490 let _server1_calls = setup_context_server(
1491 "xxx",
1492 vec![
1493 context_server::types::Tool {
1494 name: "echo".into(), // Conflicts with native EchoTool
1495 description: None,
1496 input_schema: serde_json::to_value(EchoTool::input_schema(
1497 LanguageModelToolSchemaFormat::JsonSchema,
1498 ))
1499 .unwrap(),
1500 output_schema: None,
1501 annotations: None,
1502 },
1503 context_server::types::Tool {
1504 name: "unique_tool_1".into(),
1505 description: None,
1506 input_schema: json!({"type": "object", "properties": {}}),
1507 output_schema: None,
1508 annotations: None,
1509 },
1510 ],
1511 &context_server_store,
1512 cx,
1513 );
1514
1515 let _server2_calls = setup_context_server(
1516 "yyy",
1517 vec![
1518 context_server::types::Tool {
1519 name: "echo".into(), // Also conflicts with native EchoTool
1520 description: None,
1521 input_schema: serde_json::to_value(EchoTool::input_schema(
1522 LanguageModelToolSchemaFormat::JsonSchema,
1523 ))
1524 .unwrap(),
1525 output_schema: None,
1526 annotations: None,
1527 },
1528 context_server::types::Tool {
1529 name: "unique_tool_2".into(),
1530 description: None,
1531 input_schema: json!({"type": "object", "properties": {}}),
1532 output_schema: None,
1533 annotations: None,
1534 },
1535 context_server::types::Tool {
1536 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1537 description: None,
1538 input_schema: json!({"type": "object", "properties": {}}),
1539 output_schema: None,
1540 annotations: None,
1541 },
1542 context_server::types::Tool {
1543 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1544 description: None,
1545 input_schema: json!({"type": "object", "properties": {}}),
1546 output_schema: None,
1547 annotations: None,
1548 },
1549 ],
1550 &context_server_store,
1551 cx,
1552 );
1553 let _server3_calls = setup_context_server(
1554 "zzz",
1555 vec![
1556 context_server::types::Tool {
1557 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1558 description: None,
1559 input_schema: json!({"type": "object", "properties": {}}),
1560 output_schema: None,
1561 annotations: None,
1562 },
1563 context_server::types::Tool {
1564 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1565 description: None,
1566 input_schema: json!({"type": "object", "properties": {}}),
1567 output_schema: None,
1568 annotations: None,
1569 },
1570 context_server::types::Tool {
1571 name: "c".repeat(MAX_TOOL_NAME_LENGTH + 1),
1572 description: None,
1573 input_schema: json!({"type": "object", "properties": {}}),
1574 output_schema: None,
1575 annotations: None,
1576 },
1577 ],
1578 &context_server_store,
1579 cx,
1580 );
1581
1582 thread
1583 .update(cx, |thread, cx| {
1584 thread.send(UserMessageId::new(), ["Go"], cx)
1585 })
1586 .unwrap();
1587 cx.run_until_parked();
1588 let completion = fake_model.pending_completions().pop().unwrap();
1589 assert_eq!(
1590 tool_names_for_completion(&completion),
1591 vec![
1592 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1593 "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
1594 "delay",
1595 "echo",
1596 "infinite",
1597 "tool_requiring_permission",
1598 "unique_tool_1",
1599 "unique_tool_2",
1600 "word_list",
1601 "xxx_echo",
1602 "y_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1603 "yyy_echo",
1604 "z_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1605 ]
1606 );
1607}
1608
1609#[gpui::test]
1610#[cfg_attr(not(feature = "e2e"), ignore)]
1611async fn test_cancellation(cx: &mut TestAppContext) {
1612 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1613
1614 let mut events = thread
1615 .update(cx, |thread, cx| {
1616 thread.add_tool(InfiniteTool);
1617 thread.add_tool(EchoTool);
1618 thread.send(
1619 UserMessageId::new(),
1620 ["Call the echo tool, then call the infinite tool, then explain their output"],
1621 cx,
1622 )
1623 })
1624 .unwrap();
1625
1626 // Wait until both tools are called.
1627 let mut expected_tools = vec!["Echo", "Infinite Tool"];
1628 let mut echo_id = None;
1629 let mut echo_completed = false;
1630 while let Some(event) = events.next().await {
1631 match event.unwrap() {
1632 ThreadEvent::ToolCall(tool_call) => {
1633 assert_eq!(tool_call.title, expected_tools.remove(0));
1634 if tool_call.title == "Echo" {
1635 echo_id = Some(tool_call.tool_call_id);
1636 }
1637 }
1638 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1639 acp::ToolCallUpdate {
1640 tool_call_id,
1641 fields:
1642 acp::ToolCallUpdateFields {
1643 status: Some(acp::ToolCallStatus::Completed),
1644 ..
1645 },
1646 ..
1647 },
1648 )) if Some(&tool_call_id) == echo_id.as_ref() => {
1649 echo_completed = true;
1650 }
1651 _ => {}
1652 }
1653
1654 if expected_tools.is_empty() && echo_completed {
1655 break;
1656 }
1657 }
1658
1659 // Cancel the current send and ensure that the event stream is closed, even
1660 // if one of the tools is still running.
1661 thread.update(cx, |thread, cx| thread.cancel(cx)).await;
1662 let events = events.collect::<Vec<_>>().await;
1663 let last_event = events.last();
1664 assert!(
1665 matches!(
1666 last_event,
1667 Some(Ok(ThreadEvent::Stop(acp::StopReason::Cancelled)))
1668 ),
1669 "unexpected event {last_event:?}"
1670 );
1671
1672 // Ensure we can still send a new message after cancellation.
1673 let events = thread
1674 .update(cx, |thread, cx| {
1675 thread.send(
1676 UserMessageId::new(),
1677 ["Testing: reply with 'Hello' then stop."],
1678 cx,
1679 )
1680 })
1681 .unwrap()
1682 .collect::<Vec<_>>()
1683 .await;
1684 thread.update(cx, |thread, _cx| {
1685 let message = thread.last_message().unwrap();
1686 let agent_message = message.as_agent_message().unwrap();
1687 assert_eq!(
1688 agent_message.content,
1689 vec![AgentMessageContent::Text("Hello".to_string())]
1690 );
1691 });
1692 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1693}
1694
1695#[gpui::test]
1696async fn test_terminal_tool_cancellation_captures_output(cx: &mut TestAppContext) {
1697 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1698 always_allow_tools(cx);
1699 let fake_model = model.as_fake();
1700
1701 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1702 let environment = Rc::new(FakeThreadEnvironment {
1703 handle: handle.clone(),
1704 });
1705
1706 let mut events = thread
1707 .update(cx, |thread, cx| {
1708 thread.add_tool(crate::TerminalTool::new(
1709 thread.project().clone(),
1710 environment,
1711 ));
1712 thread.send(UserMessageId::new(), ["run a command"], cx)
1713 })
1714 .unwrap();
1715
1716 cx.run_until_parked();
1717
1718 // Simulate the model calling the terminal tool
1719 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1720 LanguageModelToolUse {
1721 id: "terminal_tool_1".into(),
1722 name: "terminal".into(),
1723 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1724 input: json!({"command": "sleep 1000", "cd": "."}),
1725 is_input_complete: true,
1726 thought_signature: None,
1727 },
1728 ));
1729 fake_model.end_last_completion_stream();
1730
1731 // Wait for the terminal tool to start running
1732 wait_for_terminal_tool_started(&mut events, cx).await;
1733
1734 // Cancel the thread while the terminal is running
1735 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
1736
1737 // Collect remaining events, driving the executor to let cancellation complete
1738 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1739
1740 // Verify the terminal was killed
1741 assert!(
1742 handle.was_killed(),
1743 "expected terminal handle to be killed on cancellation"
1744 );
1745
1746 // Verify we got a cancellation stop event
1747 assert_eq!(
1748 stop_events(remaining_events),
1749 vec![acp::StopReason::Cancelled],
1750 );
1751
1752 // Verify the tool result contains the terminal output, not just "Tool canceled by user"
1753 thread.update(cx, |thread, _cx| {
1754 let message = thread.last_message().unwrap();
1755 let agent_message = message.as_agent_message().unwrap();
1756
1757 let tool_use = agent_message
1758 .content
1759 .iter()
1760 .find_map(|content| match content {
1761 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
1762 _ => None,
1763 })
1764 .expect("expected tool use in agent message");
1765
1766 let tool_result = agent_message
1767 .tool_results
1768 .get(&tool_use.id)
1769 .expect("expected tool result");
1770
1771 let result_text = match &tool_result.content {
1772 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
1773 _ => panic!("expected text content in tool result"),
1774 };
1775
1776 // "partial output" comes from FakeTerminalHandle's output field
1777 assert!(
1778 result_text.contains("partial output"),
1779 "expected tool result to contain terminal output, got: {result_text}"
1780 );
1781 // Match the actual format from process_content in terminal_tool.rs
1782 assert!(
1783 result_text.contains("The user stopped this command"),
1784 "expected tool result to indicate user stopped, got: {result_text}"
1785 );
1786 });
1787
1788 // Verify we can send a new message after cancellation
1789 verify_thread_recovery(&thread, &fake_model, cx).await;
1790}
1791
1792/// Helper to verify thread can recover after cancellation by sending a simple message.
1793async fn verify_thread_recovery(
1794 thread: &Entity<Thread>,
1795 fake_model: &FakeLanguageModel,
1796 cx: &mut TestAppContext,
1797) {
1798 let events = thread
1799 .update(cx, |thread, cx| {
1800 thread.send(
1801 UserMessageId::new(),
1802 ["Testing: reply with 'Hello' then stop."],
1803 cx,
1804 )
1805 })
1806 .unwrap();
1807 cx.run_until_parked();
1808 fake_model.send_last_completion_stream_text_chunk("Hello");
1809 fake_model
1810 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
1811 fake_model.end_last_completion_stream();
1812
1813 let events = events.collect::<Vec<_>>().await;
1814 thread.update(cx, |thread, _cx| {
1815 let message = thread.last_message().unwrap();
1816 let agent_message = message.as_agent_message().unwrap();
1817 assert_eq!(
1818 agent_message.content,
1819 vec![AgentMessageContent::Text("Hello".to_string())]
1820 );
1821 });
1822 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1823}
1824
1825/// Waits for a terminal tool to start by watching for a ToolCallUpdate with terminal content.
1826async fn wait_for_terminal_tool_started(
1827 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1828 cx: &mut TestAppContext,
1829) {
1830 let deadline = cx.executor().num_cpus() * 100; // Scale with available parallelism
1831 for _ in 0..deadline {
1832 cx.run_until_parked();
1833
1834 while let Some(Some(event)) = events.next().now_or_never() {
1835 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1836 update,
1837 ))) = &event
1838 {
1839 if update.fields.content.as_ref().is_some_and(|content| {
1840 content
1841 .iter()
1842 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1843 }) {
1844 return;
1845 }
1846 }
1847 }
1848
1849 cx.background_executor
1850 .timer(Duration::from_millis(10))
1851 .await;
1852 }
1853 panic!("terminal tool did not start within the expected time");
1854}
1855
1856/// Collects events until a Stop event is received, driving the executor to completion.
1857async fn collect_events_until_stop(
1858 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1859 cx: &mut TestAppContext,
1860) -> Vec<Result<ThreadEvent>> {
1861 let mut collected = Vec::new();
1862 let deadline = cx.executor().num_cpus() * 200;
1863
1864 for _ in 0..deadline {
1865 cx.executor().advance_clock(Duration::from_millis(10));
1866 cx.run_until_parked();
1867
1868 while let Some(Some(event)) = events.next().now_or_never() {
1869 let is_stop = matches!(&event, Ok(ThreadEvent::Stop(_)));
1870 collected.push(event);
1871 if is_stop {
1872 return collected;
1873 }
1874 }
1875 }
1876 panic!(
1877 "did not receive Stop event within the expected time; collected {} events",
1878 collected.len()
1879 );
1880}
1881
1882#[gpui::test]
1883async fn test_truncate_while_terminal_tool_running(cx: &mut TestAppContext) {
1884 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1885 always_allow_tools(cx);
1886 let fake_model = model.as_fake();
1887
1888 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1889 let environment = Rc::new(FakeThreadEnvironment {
1890 handle: handle.clone(),
1891 });
1892
1893 let message_id = UserMessageId::new();
1894 let mut events = thread
1895 .update(cx, |thread, cx| {
1896 thread.add_tool(crate::TerminalTool::new(
1897 thread.project().clone(),
1898 environment,
1899 ));
1900 thread.send(message_id.clone(), ["run a command"], cx)
1901 })
1902 .unwrap();
1903
1904 cx.run_until_parked();
1905
1906 // Simulate the model calling the terminal tool
1907 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1908 LanguageModelToolUse {
1909 id: "terminal_tool_1".into(),
1910 name: "terminal".into(),
1911 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1912 input: json!({"command": "sleep 1000", "cd": "."}),
1913 is_input_complete: true,
1914 thought_signature: None,
1915 },
1916 ));
1917 fake_model.end_last_completion_stream();
1918
1919 // Wait for the terminal tool to start running
1920 wait_for_terminal_tool_started(&mut events, cx).await;
1921
1922 // Truncate the thread while the terminal is running
1923 thread
1924 .update(cx, |thread, cx| thread.truncate(message_id, cx))
1925 .unwrap();
1926
1927 // Drive the executor to let cancellation complete
1928 let _ = collect_events_until_stop(&mut events, cx).await;
1929
1930 // Verify the terminal was killed
1931 assert!(
1932 handle.was_killed(),
1933 "expected terminal handle to be killed on truncate"
1934 );
1935
1936 // Verify the thread is empty after truncation
1937 thread.update(cx, |thread, _cx| {
1938 assert_eq!(
1939 thread.to_markdown(),
1940 "",
1941 "expected thread to be empty after truncating the only message"
1942 );
1943 });
1944
1945 // Verify we can send a new message after truncation
1946 verify_thread_recovery(&thread, &fake_model, cx).await;
1947}
1948
1949#[gpui::test]
1950async fn test_cancel_multiple_concurrent_terminal_tools(cx: &mut TestAppContext) {
1951 // Tests that cancellation properly kills all running terminal tools when multiple are active.
1952 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1953 always_allow_tools(cx);
1954 let fake_model = model.as_fake();
1955
1956 let environment = Rc::new(MultiTerminalEnvironment::new());
1957
1958 let mut events = thread
1959 .update(cx, |thread, cx| {
1960 thread.add_tool(crate::TerminalTool::new(
1961 thread.project().clone(),
1962 environment.clone(),
1963 ));
1964 thread.send(UserMessageId::new(), ["run multiple commands"], cx)
1965 })
1966 .unwrap();
1967
1968 cx.run_until_parked();
1969
1970 // Simulate the model calling two terminal tools
1971 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1972 LanguageModelToolUse {
1973 id: "terminal_tool_1".into(),
1974 name: "terminal".into(),
1975 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1976 input: json!({"command": "sleep 1000", "cd": "."}),
1977 is_input_complete: true,
1978 thought_signature: None,
1979 },
1980 ));
1981 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1982 LanguageModelToolUse {
1983 id: "terminal_tool_2".into(),
1984 name: "terminal".into(),
1985 raw_input: r#"{"command": "sleep 2000", "cd": "."}"#.into(),
1986 input: json!({"command": "sleep 2000", "cd": "."}),
1987 is_input_complete: true,
1988 thought_signature: None,
1989 },
1990 ));
1991 fake_model.end_last_completion_stream();
1992
1993 // Wait for both terminal tools to start by counting terminal content updates
1994 let mut terminals_started = 0;
1995 let deadline = cx.executor().num_cpus() * 100;
1996 for _ in 0..deadline {
1997 cx.run_until_parked();
1998
1999 while let Some(Some(event)) = events.next().now_or_never() {
2000 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
2001 update,
2002 ))) = &event
2003 {
2004 if update.fields.content.as_ref().is_some_and(|content| {
2005 content
2006 .iter()
2007 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
2008 }) {
2009 terminals_started += 1;
2010 if terminals_started >= 2 {
2011 break;
2012 }
2013 }
2014 }
2015 }
2016 if terminals_started >= 2 {
2017 break;
2018 }
2019
2020 cx.background_executor
2021 .timer(Duration::from_millis(10))
2022 .await;
2023 }
2024 assert!(
2025 terminals_started >= 2,
2026 "expected 2 terminal tools to start, got {terminals_started}"
2027 );
2028
2029 // Cancel the thread while both terminals are running
2030 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
2031
2032 // Collect remaining events
2033 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2034
2035 // Verify both terminal handles were killed
2036 let handles = environment.handles();
2037 assert_eq!(
2038 handles.len(),
2039 2,
2040 "expected 2 terminal handles to be created"
2041 );
2042 assert!(
2043 handles[0].was_killed(),
2044 "expected first terminal handle to be killed on cancellation"
2045 );
2046 assert!(
2047 handles[1].was_killed(),
2048 "expected second terminal handle to be killed on cancellation"
2049 );
2050
2051 // Verify we got a cancellation stop event
2052 assert_eq!(
2053 stop_events(remaining_events),
2054 vec![acp::StopReason::Cancelled],
2055 );
2056}
2057
2058#[gpui::test]
2059async fn test_terminal_tool_stopped_via_terminal_card_button(cx: &mut TestAppContext) {
2060 // Tests that clicking the stop button on the terminal card (as opposed to the main
2061 // cancel button) properly reports user stopped via the was_stopped_by_user path.
2062 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2063 always_allow_tools(cx);
2064 let fake_model = model.as_fake();
2065
2066 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2067 let environment = Rc::new(FakeThreadEnvironment {
2068 handle: handle.clone(),
2069 });
2070
2071 let mut events = thread
2072 .update(cx, |thread, cx| {
2073 thread.add_tool(crate::TerminalTool::new(
2074 thread.project().clone(),
2075 environment,
2076 ));
2077 thread.send(UserMessageId::new(), ["run a command"], cx)
2078 })
2079 .unwrap();
2080
2081 cx.run_until_parked();
2082
2083 // Simulate the model calling the terminal tool
2084 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2085 LanguageModelToolUse {
2086 id: "terminal_tool_1".into(),
2087 name: "terminal".into(),
2088 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2089 input: json!({"command": "sleep 1000", "cd": "."}),
2090 is_input_complete: true,
2091 thought_signature: None,
2092 },
2093 ));
2094 fake_model.end_last_completion_stream();
2095
2096 // Wait for the terminal tool to start running
2097 wait_for_terminal_tool_started(&mut events, cx).await;
2098
2099 // Simulate user clicking stop on the terminal card itself.
2100 // This sets the flag and signals exit (simulating what the real UI would do).
2101 handle.set_stopped_by_user(true);
2102 handle.killed.store(true, Ordering::SeqCst);
2103 handle.signal_exit();
2104
2105 // Wait for the tool to complete
2106 cx.run_until_parked();
2107
2108 // The thread continues after tool completion - simulate the model ending its turn
2109 fake_model
2110 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2111 fake_model.end_last_completion_stream();
2112
2113 // Collect remaining events
2114 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2115
2116 // Verify we got an EndTurn (not Cancelled, since we didn't cancel the thread)
2117 assert_eq!(
2118 stop_events(remaining_events),
2119 vec![acp::StopReason::EndTurn],
2120 );
2121
2122 // Verify the tool result indicates user stopped
2123 thread.update(cx, |thread, _cx| {
2124 let message = thread.last_message().unwrap();
2125 let agent_message = message.as_agent_message().unwrap();
2126
2127 let tool_use = agent_message
2128 .content
2129 .iter()
2130 .find_map(|content| match content {
2131 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2132 _ => None,
2133 })
2134 .expect("expected tool use in agent message");
2135
2136 let tool_result = agent_message
2137 .tool_results
2138 .get(&tool_use.id)
2139 .expect("expected tool result");
2140
2141 let result_text = match &tool_result.content {
2142 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2143 _ => panic!("expected text content in tool result"),
2144 };
2145
2146 assert!(
2147 result_text.contains("The user stopped this command"),
2148 "expected tool result to indicate user stopped, got: {result_text}"
2149 );
2150 });
2151}
2152
2153#[gpui::test]
2154async fn test_terminal_tool_timeout_expires(cx: &mut TestAppContext) {
2155 // Tests that when a timeout is configured and expires, the tool result indicates timeout.
2156 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2157 always_allow_tools(cx);
2158 let fake_model = model.as_fake();
2159
2160 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2161 let environment = Rc::new(FakeThreadEnvironment {
2162 handle: handle.clone(),
2163 });
2164
2165 let mut events = thread
2166 .update(cx, |thread, cx| {
2167 thread.add_tool(crate::TerminalTool::new(
2168 thread.project().clone(),
2169 environment,
2170 ));
2171 thread.send(UserMessageId::new(), ["run a command with timeout"], cx)
2172 })
2173 .unwrap();
2174
2175 cx.run_until_parked();
2176
2177 // Simulate the model calling the terminal tool with a short timeout
2178 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2179 LanguageModelToolUse {
2180 id: "terminal_tool_1".into(),
2181 name: "terminal".into(),
2182 raw_input: r#"{"command": "sleep 1000", "cd": ".", "timeout_ms": 100}"#.into(),
2183 input: json!({"command": "sleep 1000", "cd": ".", "timeout_ms": 100}),
2184 is_input_complete: true,
2185 thought_signature: None,
2186 },
2187 ));
2188 fake_model.end_last_completion_stream();
2189
2190 // Wait for the terminal tool to start running
2191 wait_for_terminal_tool_started(&mut events, cx).await;
2192
2193 // Advance clock past the timeout
2194 cx.executor().advance_clock(Duration::from_millis(200));
2195 cx.run_until_parked();
2196
2197 // The thread continues after tool completion - simulate the model ending its turn
2198 fake_model
2199 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2200 fake_model.end_last_completion_stream();
2201
2202 // Collect remaining events
2203 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2204
2205 // Verify the terminal was killed due to timeout
2206 assert!(
2207 handle.was_killed(),
2208 "expected terminal handle to be killed on timeout"
2209 );
2210
2211 // Verify we got an EndTurn (the tool completed, just with timeout)
2212 assert_eq!(
2213 stop_events(remaining_events),
2214 vec![acp::StopReason::EndTurn],
2215 );
2216
2217 // Verify the tool result indicates timeout, not user stopped
2218 thread.update(cx, |thread, _cx| {
2219 let message = thread.last_message().unwrap();
2220 let agent_message = message.as_agent_message().unwrap();
2221
2222 let tool_use = agent_message
2223 .content
2224 .iter()
2225 .find_map(|content| match content {
2226 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2227 _ => None,
2228 })
2229 .expect("expected tool use in agent message");
2230
2231 let tool_result = agent_message
2232 .tool_results
2233 .get(&tool_use.id)
2234 .expect("expected tool result");
2235
2236 let result_text = match &tool_result.content {
2237 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2238 _ => panic!("expected text content in tool result"),
2239 };
2240
2241 assert!(
2242 result_text.contains("timed out"),
2243 "expected tool result to indicate timeout, got: {result_text}"
2244 );
2245 assert!(
2246 !result_text.contains("The user stopped"),
2247 "tool result should not mention user stopped when it timed out, got: {result_text}"
2248 );
2249 });
2250}
2251
2252#[gpui::test]
2253async fn test_in_progress_send_canceled_by_next_send(cx: &mut TestAppContext) {
2254 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2255 let fake_model = model.as_fake();
2256
2257 let events_1 = thread
2258 .update(cx, |thread, cx| {
2259 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2260 })
2261 .unwrap();
2262 cx.run_until_parked();
2263 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2264 cx.run_until_parked();
2265
2266 let events_2 = thread
2267 .update(cx, |thread, cx| {
2268 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2269 })
2270 .unwrap();
2271 cx.run_until_parked();
2272 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2273 fake_model
2274 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2275 fake_model.end_last_completion_stream();
2276
2277 let events_1 = events_1.collect::<Vec<_>>().await;
2278 assert_eq!(stop_events(events_1), vec![acp::StopReason::Cancelled]);
2279 let events_2 = events_2.collect::<Vec<_>>().await;
2280 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2281}
2282
2283#[gpui::test]
2284async fn test_subsequent_successful_sends_dont_cancel(cx: &mut TestAppContext) {
2285 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2286 let fake_model = model.as_fake();
2287
2288 let events_1 = thread
2289 .update(cx, |thread, cx| {
2290 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2291 })
2292 .unwrap();
2293 cx.run_until_parked();
2294 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2295 fake_model
2296 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2297 fake_model.end_last_completion_stream();
2298 let events_1 = events_1.collect::<Vec<_>>().await;
2299
2300 let events_2 = thread
2301 .update(cx, |thread, cx| {
2302 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2303 })
2304 .unwrap();
2305 cx.run_until_parked();
2306 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2307 fake_model
2308 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2309 fake_model.end_last_completion_stream();
2310 let events_2 = events_2.collect::<Vec<_>>().await;
2311
2312 assert_eq!(stop_events(events_1), vec![acp::StopReason::EndTurn]);
2313 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2314}
2315
2316#[gpui::test]
2317async fn test_refusal(cx: &mut TestAppContext) {
2318 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2319 let fake_model = model.as_fake();
2320
2321 let events = thread
2322 .update(cx, |thread, cx| {
2323 thread.send(UserMessageId::new(), ["Hello"], cx)
2324 })
2325 .unwrap();
2326 cx.run_until_parked();
2327 thread.read_with(cx, |thread, _| {
2328 assert_eq!(
2329 thread.to_markdown(),
2330 indoc! {"
2331 ## User
2332
2333 Hello
2334 "}
2335 );
2336 });
2337
2338 fake_model.send_last_completion_stream_text_chunk("Hey!");
2339 cx.run_until_parked();
2340 thread.read_with(cx, |thread, _| {
2341 assert_eq!(
2342 thread.to_markdown(),
2343 indoc! {"
2344 ## User
2345
2346 Hello
2347
2348 ## Assistant
2349
2350 Hey!
2351 "}
2352 );
2353 });
2354
2355 // If the model refuses to continue, the thread should remove all the messages after the last user message.
2356 fake_model
2357 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::Refusal));
2358 let events = events.collect::<Vec<_>>().await;
2359 assert_eq!(stop_events(events), vec![acp::StopReason::Refusal]);
2360 thread.read_with(cx, |thread, _| {
2361 assert_eq!(thread.to_markdown(), "");
2362 });
2363}
2364
2365#[gpui::test]
2366async fn test_truncate_first_message(cx: &mut TestAppContext) {
2367 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2368 let fake_model = model.as_fake();
2369
2370 let message_id = UserMessageId::new();
2371 thread
2372 .update(cx, |thread, cx| {
2373 thread.send(message_id.clone(), ["Hello"], cx)
2374 })
2375 .unwrap();
2376 cx.run_until_parked();
2377 thread.read_with(cx, |thread, _| {
2378 assert_eq!(
2379 thread.to_markdown(),
2380 indoc! {"
2381 ## User
2382
2383 Hello
2384 "}
2385 );
2386 assert_eq!(thread.latest_token_usage(), None);
2387 });
2388
2389 fake_model.send_last_completion_stream_text_chunk("Hey!");
2390 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2391 language_model::TokenUsage {
2392 input_tokens: 32_000,
2393 output_tokens: 16_000,
2394 cache_creation_input_tokens: 0,
2395 cache_read_input_tokens: 0,
2396 },
2397 ));
2398 cx.run_until_parked();
2399 thread.read_with(cx, |thread, _| {
2400 assert_eq!(
2401 thread.to_markdown(),
2402 indoc! {"
2403 ## User
2404
2405 Hello
2406
2407 ## Assistant
2408
2409 Hey!
2410 "}
2411 );
2412 assert_eq!(
2413 thread.latest_token_usage(),
2414 Some(acp_thread::TokenUsage {
2415 used_tokens: 32_000 + 16_000,
2416 max_tokens: 1_000_000,
2417 output_tokens: 16_000,
2418 })
2419 );
2420 });
2421
2422 thread
2423 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2424 .unwrap();
2425 cx.run_until_parked();
2426 thread.read_with(cx, |thread, _| {
2427 assert_eq!(thread.to_markdown(), "");
2428 assert_eq!(thread.latest_token_usage(), None);
2429 });
2430
2431 // Ensure we can still send a new message after truncation.
2432 thread
2433 .update(cx, |thread, cx| {
2434 thread.send(UserMessageId::new(), ["Hi"], cx)
2435 })
2436 .unwrap();
2437 thread.update(cx, |thread, _cx| {
2438 assert_eq!(
2439 thread.to_markdown(),
2440 indoc! {"
2441 ## User
2442
2443 Hi
2444 "}
2445 );
2446 });
2447 cx.run_until_parked();
2448 fake_model.send_last_completion_stream_text_chunk("Ahoy!");
2449 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2450 language_model::TokenUsage {
2451 input_tokens: 40_000,
2452 output_tokens: 20_000,
2453 cache_creation_input_tokens: 0,
2454 cache_read_input_tokens: 0,
2455 },
2456 ));
2457 cx.run_until_parked();
2458 thread.read_with(cx, |thread, _| {
2459 assert_eq!(
2460 thread.to_markdown(),
2461 indoc! {"
2462 ## User
2463
2464 Hi
2465
2466 ## Assistant
2467
2468 Ahoy!
2469 "}
2470 );
2471
2472 assert_eq!(
2473 thread.latest_token_usage(),
2474 Some(acp_thread::TokenUsage {
2475 used_tokens: 40_000 + 20_000,
2476 max_tokens: 1_000_000,
2477 output_tokens: 20_000,
2478 })
2479 );
2480 });
2481}
2482
2483#[gpui::test]
2484async fn test_truncate_second_message(cx: &mut TestAppContext) {
2485 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2486 let fake_model = model.as_fake();
2487
2488 thread
2489 .update(cx, |thread, cx| {
2490 thread.send(UserMessageId::new(), ["Message 1"], cx)
2491 })
2492 .unwrap();
2493 cx.run_until_parked();
2494 fake_model.send_last_completion_stream_text_chunk("Message 1 response");
2495 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2496 language_model::TokenUsage {
2497 input_tokens: 32_000,
2498 output_tokens: 16_000,
2499 cache_creation_input_tokens: 0,
2500 cache_read_input_tokens: 0,
2501 },
2502 ));
2503 fake_model.end_last_completion_stream();
2504 cx.run_until_parked();
2505
2506 let assert_first_message_state = |cx: &mut TestAppContext| {
2507 thread.clone().read_with(cx, |thread, _| {
2508 assert_eq!(
2509 thread.to_markdown(),
2510 indoc! {"
2511 ## User
2512
2513 Message 1
2514
2515 ## Assistant
2516
2517 Message 1 response
2518 "}
2519 );
2520
2521 assert_eq!(
2522 thread.latest_token_usage(),
2523 Some(acp_thread::TokenUsage {
2524 used_tokens: 32_000 + 16_000,
2525 max_tokens: 1_000_000,
2526 output_tokens: 16_000,
2527 })
2528 );
2529 });
2530 };
2531
2532 assert_first_message_state(cx);
2533
2534 let second_message_id = UserMessageId::new();
2535 thread
2536 .update(cx, |thread, cx| {
2537 thread.send(second_message_id.clone(), ["Message 2"], cx)
2538 })
2539 .unwrap();
2540 cx.run_until_parked();
2541
2542 fake_model.send_last_completion_stream_text_chunk("Message 2 response");
2543 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2544 language_model::TokenUsage {
2545 input_tokens: 40_000,
2546 output_tokens: 20_000,
2547 cache_creation_input_tokens: 0,
2548 cache_read_input_tokens: 0,
2549 },
2550 ));
2551 fake_model.end_last_completion_stream();
2552 cx.run_until_parked();
2553
2554 thread.read_with(cx, |thread, _| {
2555 assert_eq!(
2556 thread.to_markdown(),
2557 indoc! {"
2558 ## User
2559
2560 Message 1
2561
2562 ## Assistant
2563
2564 Message 1 response
2565
2566 ## User
2567
2568 Message 2
2569
2570 ## Assistant
2571
2572 Message 2 response
2573 "}
2574 );
2575
2576 assert_eq!(
2577 thread.latest_token_usage(),
2578 Some(acp_thread::TokenUsage {
2579 used_tokens: 40_000 + 20_000,
2580 max_tokens: 1_000_000,
2581 output_tokens: 20_000,
2582 })
2583 );
2584 });
2585
2586 thread
2587 .update(cx, |thread, cx| thread.truncate(second_message_id, cx))
2588 .unwrap();
2589 cx.run_until_parked();
2590
2591 assert_first_message_state(cx);
2592}
2593
2594#[gpui::test]
2595async fn test_title_generation(cx: &mut TestAppContext) {
2596 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2597 let fake_model = model.as_fake();
2598
2599 let summary_model = Arc::new(FakeLanguageModel::default());
2600 thread.update(cx, |thread, cx| {
2601 thread.set_summarization_model(Some(summary_model.clone()), cx)
2602 });
2603
2604 let send = thread
2605 .update(cx, |thread, cx| {
2606 thread.send(UserMessageId::new(), ["Hello"], cx)
2607 })
2608 .unwrap();
2609 cx.run_until_parked();
2610
2611 fake_model.send_last_completion_stream_text_chunk("Hey!");
2612 fake_model.end_last_completion_stream();
2613 cx.run_until_parked();
2614 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "New Thread"));
2615
2616 // Ensure the summary model has been invoked to generate a title.
2617 summary_model.send_last_completion_stream_text_chunk("Hello ");
2618 summary_model.send_last_completion_stream_text_chunk("world\nG");
2619 summary_model.send_last_completion_stream_text_chunk("oodnight Moon");
2620 summary_model.end_last_completion_stream();
2621 send.collect::<Vec<_>>().await;
2622 cx.run_until_parked();
2623 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2624
2625 // Send another message, ensuring no title is generated this time.
2626 let send = thread
2627 .update(cx, |thread, cx| {
2628 thread.send(UserMessageId::new(), ["Hello again"], cx)
2629 })
2630 .unwrap();
2631 cx.run_until_parked();
2632 fake_model.send_last_completion_stream_text_chunk("Hey again!");
2633 fake_model.end_last_completion_stream();
2634 cx.run_until_parked();
2635 assert_eq!(summary_model.pending_completions(), Vec::new());
2636 send.collect::<Vec<_>>().await;
2637 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2638}
2639
2640#[gpui::test]
2641async fn test_building_request_with_pending_tools(cx: &mut TestAppContext) {
2642 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2643 let fake_model = model.as_fake();
2644
2645 let _events = thread
2646 .update(cx, |thread, cx| {
2647 thread.add_tool(ToolRequiringPermission);
2648 thread.add_tool(EchoTool);
2649 thread.send(UserMessageId::new(), ["Hey!"], cx)
2650 })
2651 .unwrap();
2652 cx.run_until_parked();
2653
2654 let permission_tool_use = LanguageModelToolUse {
2655 id: "tool_id_1".into(),
2656 name: ToolRequiringPermission::name().into(),
2657 raw_input: "{}".into(),
2658 input: json!({}),
2659 is_input_complete: true,
2660 thought_signature: None,
2661 };
2662 let echo_tool_use = LanguageModelToolUse {
2663 id: "tool_id_2".into(),
2664 name: EchoTool::name().into(),
2665 raw_input: json!({"text": "test"}).to_string(),
2666 input: json!({"text": "test"}),
2667 is_input_complete: true,
2668 thought_signature: None,
2669 };
2670 fake_model.send_last_completion_stream_text_chunk("Hi!");
2671 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2672 permission_tool_use,
2673 ));
2674 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2675 echo_tool_use.clone(),
2676 ));
2677 fake_model.end_last_completion_stream();
2678 cx.run_until_parked();
2679
2680 // Ensure pending tools are skipped when building a request.
2681 let request = thread
2682 .read_with(cx, |thread, cx| {
2683 thread.build_completion_request(CompletionIntent::EditFile, cx)
2684 })
2685 .unwrap();
2686 assert_eq!(
2687 request.messages[1..],
2688 vec![
2689 LanguageModelRequestMessage {
2690 role: Role::User,
2691 content: vec!["Hey!".into()],
2692 cache: true,
2693 reasoning_details: None,
2694 },
2695 LanguageModelRequestMessage {
2696 role: Role::Assistant,
2697 content: vec![
2698 MessageContent::Text("Hi!".into()),
2699 MessageContent::ToolUse(echo_tool_use.clone())
2700 ],
2701 cache: false,
2702 reasoning_details: None,
2703 },
2704 LanguageModelRequestMessage {
2705 role: Role::User,
2706 content: vec![MessageContent::ToolResult(LanguageModelToolResult {
2707 tool_use_id: echo_tool_use.id.clone(),
2708 tool_name: echo_tool_use.name,
2709 is_error: false,
2710 content: "test".into(),
2711 output: Some("test".into())
2712 })],
2713 cache: false,
2714 reasoning_details: None,
2715 },
2716 ],
2717 );
2718}
2719
2720#[gpui::test]
2721async fn test_agent_connection(cx: &mut TestAppContext) {
2722 cx.update(settings::init);
2723 let templates = Templates::new();
2724
2725 // Initialize language model system with test provider
2726 cx.update(|cx| {
2727 gpui_tokio::init(cx);
2728
2729 let http_client = FakeHttpClient::with_404_response();
2730 let clock = Arc::new(clock::FakeSystemClock::new());
2731 let client = Client::new(clock, http_client, cx);
2732 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
2733 language_model::init(client.clone(), cx);
2734 language_models::init(user_store, client.clone(), cx);
2735 LanguageModelRegistry::test(cx);
2736 });
2737 cx.executor().forbid_parking();
2738
2739 // Create a project for new_thread
2740 let fake_fs = cx.update(|cx| fs::FakeFs::new(cx.background_executor().clone()));
2741 fake_fs.insert_tree(path!("/test"), json!({})).await;
2742 let project = Project::test(fake_fs.clone(), [Path::new("/test")], cx).await;
2743 let cwd = Path::new("/test");
2744 let thread_store = cx.new(|cx| ThreadStore::new(cx));
2745
2746 // Create agent and connection
2747 let agent = NativeAgent::new(
2748 project.clone(),
2749 thread_store,
2750 templates.clone(),
2751 None,
2752 fake_fs.clone(),
2753 &mut cx.to_async(),
2754 )
2755 .await
2756 .unwrap();
2757 let connection = NativeAgentConnection(agent.clone());
2758
2759 // Create a thread using new_thread
2760 let connection_rc = Rc::new(connection.clone());
2761 let acp_thread = cx
2762 .update(|cx| connection_rc.new_thread(project, cwd, cx))
2763 .await
2764 .expect("new_thread should succeed");
2765
2766 // Get the session_id from the AcpThread
2767 let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
2768
2769 // Test model_selector returns Some
2770 let selector_opt = connection.model_selector(&session_id);
2771 assert!(
2772 selector_opt.is_some(),
2773 "agent should always support ModelSelector"
2774 );
2775 let selector = selector_opt.unwrap();
2776
2777 // Test list_models
2778 let listed_models = cx
2779 .update(|cx| selector.list_models(cx))
2780 .await
2781 .expect("list_models should succeed");
2782 let AgentModelList::Grouped(listed_models) = listed_models else {
2783 panic!("Unexpected model list type");
2784 };
2785 assert!(!listed_models.is_empty(), "should have at least one model");
2786 assert_eq!(
2787 listed_models[&AgentModelGroupName("Fake".into())][0]
2788 .id
2789 .0
2790 .as_ref(),
2791 "fake/fake"
2792 );
2793
2794 // Test selected_model returns the default
2795 let model = cx
2796 .update(|cx| selector.selected_model(cx))
2797 .await
2798 .expect("selected_model should succeed");
2799 let model = cx
2800 .update(|cx| agent.read(cx).models().model_from_id(&model.id))
2801 .unwrap();
2802 let model = model.as_fake();
2803 assert_eq!(model.id().0, "fake", "should return default model");
2804
2805 let request = acp_thread.update(cx, |thread, cx| thread.send(vec!["abc".into()], cx));
2806 cx.run_until_parked();
2807 model.send_last_completion_stream_text_chunk("def");
2808 cx.run_until_parked();
2809 acp_thread.read_with(cx, |thread, cx| {
2810 assert_eq!(
2811 thread.to_markdown(cx),
2812 indoc! {"
2813 ## User
2814
2815 abc
2816
2817 ## Assistant
2818
2819 def
2820
2821 "}
2822 )
2823 });
2824
2825 // Test cancel
2826 cx.update(|cx| connection.cancel(&session_id, cx));
2827 request.await.expect("prompt should fail gracefully");
2828
2829 // Ensure that dropping the ACP thread causes the native thread to be
2830 // dropped as well.
2831 cx.update(|_| drop(acp_thread));
2832 let result = cx
2833 .update(|cx| {
2834 connection.prompt(
2835 Some(acp_thread::UserMessageId::new()),
2836 acp::PromptRequest::new(session_id.clone(), vec!["ghi".into()]),
2837 cx,
2838 )
2839 })
2840 .await;
2841 assert_eq!(
2842 result.as_ref().unwrap_err().to_string(),
2843 "Session not found",
2844 "unexpected result: {:?}",
2845 result
2846 );
2847}
2848
2849#[gpui::test]
2850async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
2851 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2852 thread.update(cx, |thread, _cx| thread.add_tool(ThinkingTool));
2853 let fake_model = model.as_fake();
2854
2855 let mut events = thread
2856 .update(cx, |thread, cx| {
2857 thread.send(UserMessageId::new(), ["Think"], cx)
2858 })
2859 .unwrap();
2860 cx.run_until_parked();
2861
2862 // Simulate streaming partial input.
2863 let input = json!({});
2864 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2865 LanguageModelToolUse {
2866 id: "1".into(),
2867 name: ThinkingTool::name().into(),
2868 raw_input: input.to_string(),
2869 input,
2870 is_input_complete: false,
2871 thought_signature: None,
2872 },
2873 ));
2874
2875 // Input streaming completed
2876 let input = json!({ "content": "Thinking hard!" });
2877 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2878 LanguageModelToolUse {
2879 id: "1".into(),
2880 name: "thinking".into(),
2881 raw_input: input.to_string(),
2882 input,
2883 is_input_complete: true,
2884 thought_signature: None,
2885 },
2886 ));
2887 fake_model.end_last_completion_stream();
2888 cx.run_until_parked();
2889
2890 let tool_call = expect_tool_call(&mut events).await;
2891 assert_eq!(
2892 tool_call,
2893 acp::ToolCall::new("1", "Thinking")
2894 .kind(acp::ToolKind::Think)
2895 .raw_input(json!({}))
2896 .meta(acp::Meta::from_iter([(
2897 "tool_name".into(),
2898 "thinking".into()
2899 )]))
2900 );
2901 let update = expect_tool_call_update_fields(&mut events).await;
2902 assert_eq!(
2903 update,
2904 acp::ToolCallUpdate::new(
2905 "1",
2906 acp::ToolCallUpdateFields::new()
2907 .title("Thinking")
2908 .kind(acp::ToolKind::Think)
2909 .raw_input(json!({ "content": "Thinking hard!"}))
2910 )
2911 );
2912 let update = expect_tool_call_update_fields(&mut events).await;
2913 assert_eq!(
2914 update,
2915 acp::ToolCallUpdate::new(
2916 "1",
2917 acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
2918 )
2919 );
2920 let update = expect_tool_call_update_fields(&mut events).await;
2921 assert_eq!(
2922 update,
2923 acp::ToolCallUpdate::new(
2924 "1",
2925 acp::ToolCallUpdateFields::new().content(vec!["Thinking hard!".into()])
2926 )
2927 );
2928 let update = expect_tool_call_update_fields(&mut events).await;
2929 assert_eq!(
2930 update,
2931 acp::ToolCallUpdate::new(
2932 "1",
2933 acp::ToolCallUpdateFields::new()
2934 .status(acp::ToolCallStatus::Completed)
2935 .raw_output("Finished thinking.")
2936 )
2937 );
2938}
2939
2940#[gpui::test]
2941async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
2942 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2943 let fake_model = model.as_fake();
2944
2945 let mut events = thread
2946 .update(cx, |thread, cx| {
2947 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2948 thread.send(UserMessageId::new(), ["Hello!"], cx)
2949 })
2950 .unwrap();
2951 cx.run_until_parked();
2952
2953 fake_model.send_last_completion_stream_text_chunk("Hey!");
2954 fake_model.end_last_completion_stream();
2955
2956 let mut retry_events = Vec::new();
2957 while let Some(Ok(event)) = events.next().await {
2958 match event {
2959 ThreadEvent::Retry(retry_status) => {
2960 retry_events.push(retry_status);
2961 }
2962 ThreadEvent::Stop(..) => break,
2963 _ => {}
2964 }
2965 }
2966
2967 assert_eq!(retry_events.len(), 0);
2968 thread.read_with(cx, |thread, _cx| {
2969 assert_eq!(
2970 thread.to_markdown(),
2971 indoc! {"
2972 ## User
2973
2974 Hello!
2975
2976 ## Assistant
2977
2978 Hey!
2979 "}
2980 )
2981 });
2982}
2983
2984#[gpui::test]
2985async fn test_send_retry_on_error(cx: &mut TestAppContext) {
2986 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2987 let fake_model = model.as_fake();
2988
2989 let mut events = thread
2990 .update(cx, |thread, cx| {
2991 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2992 thread.send(UserMessageId::new(), ["Hello!"], cx)
2993 })
2994 .unwrap();
2995 cx.run_until_parked();
2996
2997 fake_model.send_last_completion_stream_text_chunk("Hey,");
2998 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
2999 provider: LanguageModelProviderName::new("Anthropic"),
3000 retry_after: Some(Duration::from_secs(3)),
3001 });
3002 fake_model.end_last_completion_stream();
3003
3004 cx.executor().advance_clock(Duration::from_secs(3));
3005 cx.run_until_parked();
3006
3007 fake_model.send_last_completion_stream_text_chunk("there!");
3008 fake_model.end_last_completion_stream();
3009 cx.run_until_parked();
3010
3011 let mut retry_events = Vec::new();
3012 while let Some(Ok(event)) = events.next().await {
3013 match event {
3014 ThreadEvent::Retry(retry_status) => {
3015 retry_events.push(retry_status);
3016 }
3017 ThreadEvent::Stop(..) => break,
3018 _ => {}
3019 }
3020 }
3021
3022 assert_eq!(retry_events.len(), 1);
3023 assert!(matches!(
3024 retry_events[0],
3025 acp_thread::RetryStatus { attempt: 1, .. }
3026 ));
3027 thread.read_with(cx, |thread, _cx| {
3028 assert_eq!(
3029 thread.to_markdown(),
3030 indoc! {"
3031 ## User
3032
3033 Hello!
3034
3035 ## Assistant
3036
3037 Hey,
3038
3039 [resume]
3040
3041 ## Assistant
3042
3043 there!
3044 "}
3045 )
3046 });
3047}
3048
3049#[gpui::test]
3050async fn test_send_retry_finishes_tool_calls_on_error(cx: &mut TestAppContext) {
3051 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3052 let fake_model = model.as_fake();
3053
3054 let events = thread
3055 .update(cx, |thread, cx| {
3056 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3057 thread.add_tool(EchoTool);
3058 thread.send(UserMessageId::new(), ["Call the echo tool!"], cx)
3059 })
3060 .unwrap();
3061 cx.run_until_parked();
3062
3063 let tool_use_1 = LanguageModelToolUse {
3064 id: "tool_1".into(),
3065 name: EchoTool::name().into(),
3066 raw_input: json!({"text": "test"}).to_string(),
3067 input: json!({"text": "test"}),
3068 is_input_complete: true,
3069 thought_signature: None,
3070 };
3071 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
3072 tool_use_1.clone(),
3073 ));
3074 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3075 provider: LanguageModelProviderName::new("Anthropic"),
3076 retry_after: Some(Duration::from_secs(3)),
3077 });
3078 fake_model.end_last_completion_stream();
3079
3080 cx.executor().advance_clock(Duration::from_secs(3));
3081 let completion = fake_model.pending_completions().pop().unwrap();
3082 assert_eq!(
3083 completion.messages[1..],
3084 vec![
3085 LanguageModelRequestMessage {
3086 role: Role::User,
3087 content: vec!["Call the echo tool!".into()],
3088 cache: false,
3089 reasoning_details: None,
3090 },
3091 LanguageModelRequestMessage {
3092 role: Role::Assistant,
3093 content: vec![language_model::MessageContent::ToolUse(tool_use_1.clone())],
3094 cache: false,
3095 reasoning_details: None,
3096 },
3097 LanguageModelRequestMessage {
3098 role: Role::User,
3099 content: vec![language_model::MessageContent::ToolResult(
3100 LanguageModelToolResult {
3101 tool_use_id: tool_use_1.id.clone(),
3102 tool_name: tool_use_1.name.clone(),
3103 is_error: false,
3104 content: "test".into(),
3105 output: Some("test".into())
3106 }
3107 )],
3108 cache: true,
3109 reasoning_details: None,
3110 },
3111 ]
3112 );
3113
3114 fake_model.send_last_completion_stream_text_chunk("Done");
3115 fake_model.end_last_completion_stream();
3116 cx.run_until_parked();
3117 events.collect::<Vec<_>>().await;
3118 thread.read_with(cx, |thread, _cx| {
3119 assert_eq!(
3120 thread.last_message(),
3121 Some(Message::Agent(AgentMessage {
3122 content: vec![AgentMessageContent::Text("Done".into())],
3123 tool_results: IndexMap::default(),
3124 reasoning_details: None,
3125 }))
3126 );
3127 })
3128}
3129
3130#[gpui::test]
3131async fn test_send_max_retries_exceeded(cx: &mut TestAppContext) {
3132 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3133 let fake_model = model.as_fake();
3134
3135 let mut events = thread
3136 .update(cx, |thread, cx| {
3137 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3138 thread.send(UserMessageId::new(), ["Hello!"], cx)
3139 })
3140 .unwrap();
3141 cx.run_until_parked();
3142
3143 for _ in 0..crate::thread::MAX_RETRY_ATTEMPTS + 1 {
3144 fake_model.send_last_completion_stream_error(
3145 LanguageModelCompletionError::ServerOverloaded {
3146 provider: LanguageModelProviderName::new("Anthropic"),
3147 retry_after: Some(Duration::from_secs(3)),
3148 },
3149 );
3150 fake_model.end_last_completion_stream();
3151 cx.executor().advance_clock(Duration::from_secs(3));
3152 cx.run_until_parked();
3153 }
3154
3155 let mut errors = Vec::new();
3156 let mut retry_events = Vec::new();
3157 while let Some(event) = events.next().await {
3158 match event {
3159 Ok(ThreadEvent::Retry(retry_status)) => {
3160 retry_events.push(retry_status);
3161 }
3162 Ok(ThreadEvent::Stop(..)) => break,
3163 Err(error) => errors.push(error),
3164 _ => {}
3165 }
3166 }
3167
3168 assert_eq!(
3169 retry_events.len(),
3170 crate::thread::MAX_RETRY_ATTEMPTS as usize
3171 );
3172 for i in 0..crate::thread::MAX_RETRY_ATTEMPTS as usize {
3173 assert_eq!(retry_events[i].attempt, i + 1);
3174 }
3175 assert_eq!(errors.len(), 1);
3176 let error = errors[0]
3177 .downcast_ref::<LanguageModelCompletionError>()
3178 .unwrap();
3179 assert!(matches!(
3180 error,
3181 LanguageModelCompletionError::ServerOverloaded { .. }
3182 ));
3183}
3184
3185/// Filters out the stop events for asserting against in tests
3186fn stop_events(result_events: Vec<Result<ThreadEvent>>) -> Vec<acp::StopReason> {
3187 result_events
3188 .into_iter()
3189 .filter_map(|event| match event.unwrap() {
3190 ThreadEvent::Stop(stop_reason) => Some(stop_reason),
3191 _ => None,
3192 })
3193 .collect()
3194}
3195
3196struct ThreadTest {
3197 model: Arc<dyn LanguageModel>,
3198 thread: Entity<Thread>,
3199 project_context: Entity<ProjectContext>,
3200 context_server_store: Entity<ContextServerStore>,
3201 fs: Arc<FakeFs>,
3202}
3203
3204enum TestModel {
3205 Sonnet4,
3206 Fake,
3207}
3208
3209impl TestModel {
3210 fn id(&self) -> LanguageModelId {
3211 match self {
3212 TestModel::Sonnet4 => LanguageModelId("claude-sonnet-4-latest".into()),
3213 TestModel::Fake => unreachable!(),
3214 }
3215 }
3216}
3217
3218async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
3219 cx.executor().allow_parking();
3220
3221 let fs = FakeFs::new(cx.background_executor.clone());
3222 fs.create_dir(paths::settings_file().parent().unwrap())
3223 .await
3224 .unwrap();
3225 fs.insert_file(
3226 paths::settings_file(),
3227 json!({
3228 "agent": {
3229 "default_profile": "test-profile",
3230 "profiles": {
3231 "test-profile": {
3232 "name": "Test Profile",
3233 "tools": {
3234 EchoTool::name(): true,
3235 DelayTool::name(): true,
3236 WordListTool::name(): true,
3237 ToolRequiringPermission::name(): true,
3238 InfiniteTool::name(): true,
3239 ThinkingTool::name(): true,
3240 "terminal": true,
3241 }
3242 }
3243 }
3244 }
3245 })
3246 .to_string()
3247 .into_bytes(),
3248 )
3249 .await;
3250
3251 cx.update(|cx| {
3252 settings::init(cx);
3253
3254 match model {
3255 TestModel::Fake => {}
3256 TestModel::Sonnet4 => {
3257 gpui_tokio::init(cx);
3258 let http_client = ReqwestClient::user_agent("agent tests").unwrap();
3259 cx.set_http_client(Arc::new(http_client));
3260 let client = Client::production(cx);
3261 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
3262 language_model::init(client.clone(), cx);
3263 language_models::init(user_store, client.clone(), cx);
3264 }
3265 };
3266
3267 watch_settings(fs.clone(), cx);
3268 });
3269
3270 let templates = Templates::new();
3271
3272 fs.insert_tree(path!("/test"), json!({})).await;
3273 let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
3274
3275 let model = cx
3276 .update(|cx| {
3277 if let TestModel::Fake = model {
3278 Task::ready(Arc::new(FakeLanguageModel::default()) as Arc<_>)
3279 } else {
3280 let model_id = model.id();
3281 let models = LanguageModelRegistry::read_global(cx);
3282 let model = models
3283 .available_models(cx)
3284 .find(|model| model.id() == model_id)
3285 .unwrap();
3286
3287 let provider = models.provider(&model.provider_id()).unwrap();
3288 let authenticated = provider.authenticate(cx);
3289
3290 cx.spawn(async move |_cx| {
3291 authenticated.await.unwrap();
3292 model
3293 })
3294 }
3295 })
3296 .await;
3297
3298 let project_context = cx.new(|_cx| ProjectContext::default());
3299 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3300 let context_server_registry =
3301 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3302 let thread = cx.new(|cx| {
3303 Thread::new(
3304 project,
3305 project_context.clone(),
3306 context_server_registry,
3307 templates,
3308 Some(model.clone()),
3309 cx,
3310 )
3311 });
3312 ThreadTest {
3313 model,
3314 thread,
3315 project_context,
3316 context_server_store,
3317 fs,
3318 }
3319}
3320
3321#[cfg(test)]
3322#[ctor::ctor]
3323fn init_logger() {
3324 if std::env::var("RUST_LOG").is_ok() {
3325 env_logger::init();
3326 }
3327}
3328
3329fn watch_settings(fs: Arc<dyn Fs>, cx: &mut App) {
3330 let fs = fs.clone();
3331 cx.spawn({
3332 async move |cx| {
3333 let mut new_settings_content_rx = settings::watch_config_file(
3334 cx.background_executor(),
3335 fs,
3336 paths::settings_file().clone(),
3337 );
3338
3339 while let Some(new_settings_content) = new_settings_content_rx.next().await {
3340 cx.update(|cx| {
3341 SettingsStore::update_global(cx, |settings, cx| {
3342 settings.set_user_settings(&new_settings_content, cx)
3343 })
3344 })
3345 .ok();
3346 }
3347 }
3348 })
3349 .detach();
3350}
3351
3352fn tool_names_for_completion(completion: &LanguageModelRequest) -> Vec<String> {
3353 completion
3354 .tools
3355 .iter()
3356 .map(|tool| tool.name.clone())
3357 .collect()
3358}
3359
3360fn setup_context_server(
3361 name: &'static str,
3362 tools: Vec<context_server::types::Tool>,
3363 context_server_store: &Entity<ContextServerStore>,
3364 cx: &mut TestAppContext,
3365) -> mpsc::UnboundedReceiver<(
3366 context_server::types::CallToolParams,
3367 oneshot::Sender<context_server::types::CallToolResponse>,
3368)> {
3369 cx.update(|cx| {
3370 let mut settings = ProjectSettings::get_global(cx).clone();
3371 settings.context_servers.insert(
3372 name.into(),
3373 project::project_settings::ContextServerSettings::Stdio {
3374 enabled: true,
3375 command: ContextServerCommand {
3376 path: "somebinary".into(),
3377 args: Vec::new(),
3378 env: None,
3379 timeout: None,
3380 },
3381 },
3382 );
3383 ProjectSettings::override_global(settings, cx);
3384 });
3385
3386 let (mcp_tool_calls_tx, mcp_tool_calls_rx) = mpsc::unbounded();
3387 let fake_transport = context_server::test::create_fake_transport(name, cx.executor())
3388 .on_request::<context_server::types::requests::Initialize, _>(move |_params| async move {
3389 context_server::types::InitializeResponse {
3390 protocol_version: context_server::types::ProtocolVersion(
3391 context_server::types::LATEST_PROTOCOL_VERSION.to_string(),
3392 ),
3393 server_info: context_server::types::Implementation {
3394 name: name.into(),
3395 version: "1.0.0".to_string(),
3396 },
3397 capabilities: context_server::types::ServerCapabilities {
3398 tools: Some(context_server::types::ToolsCapabilities {
3399 list_changed: Some(true),
3400 }),
3401 ..Default::default()
3402 },
3403 meta: None,
3404 }
3405 })
3406 .on_request::<context_server::types::requests::ListTools, _>(move |_params| {
3407 let tools = tools.clone();
3408 async move {
3409 context_server::types::ListToolsResponse {
3410 tools,
3411 next_cursor: None,
3412 meta: None,
3413 }
3414 }
3415 })
3416 .on_request::<context_server::types::requests::CallTool, _>(move |params| {
3417 let mcp_tool_calls_tx = mcp_tool_calls_tx.clone();
3418 async move {
3419 let (response_tx, response_rx) = oneshot::channel();
3420 mcp_tool_calls_tx
3421 .unbounded_send((params, response_tx))
3422 .unwrap();
3423 response_rx.await.unwrap()
3424 }
3425 });
3426 context_server_store.update(cx, |store, cx| {
3427 store.start_server(
3428 Arc::new(ContextServer::new(
3429 ContextServerId(name.into()),
3430 Arc::new(fake_transport),
3431 )),
3432 cx,
3433 );
3434 });
3435 cx.run_until_parked();
3436 mcp_tool_calls_rx
3437}
3438
3439#[gpui::test]
3440async fn test_tokens_before_message(cx: &mut TestAppContext) {
3441 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3442 let fake_model = model.as_fake();
3443
3444 // First message
3445 let message_1_id = UserMessageId::new();
3446 thread
3447 .update(cx, |thread, cx| {
3448 thread.send(message_1_id.clone(), ["First message"], cx)
3449 })
3450 .unwrap();
3451 cx.run_until_parked();
3452
3453 // Before any response, tokens_before_message should return None for first message
3454 thread.read_with(cx, |thread, _| {
3455 assert_eq!(
3456 thread.tokens_before_message(&message_1_id),
3457 None,
3458 "First message should have no tokens before it"
3459 );
3460 });
3461
3462 // Complete first message with usage
3463 fake_model.send_last_completion_stream_text_chunk("Response 1");
3464 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3465 language_model::TokenUsage {
3466 input_tokens: 100,
3467 output_tokens: 50,
3468 cache_creation_input_tokens: 0,
3469 cache_read_input_tokens: 0,
3470 },
3471 ));
3472 fake_model.end_last_completion_stream();
3473 cx.run_until_parked();
3474
3475 // First message still has no tokens before it
3476 thread.read_with(cx, |thread, _| {
3477 assert_eq!(
3478 thread.tokens_before_message(&message_1_id),
3479 None,
3480 "First message should still have no tokens before it after response"
3481 );
3482 });
3483
3484 // Second message
3485 let message_2_id = UserMessageId::new();
3486 thread
3487 .update(cx, |thread, cx| {
3488 thread.send(message_2_id.clone(), ["Second message"], cx)
3489 })
3490 .unwrap();
3491 cx.run_until_parked();
3492
3493 // Second message should have first message's input tokens before it
3494 thread.read_with(cx, |thread, _| {
3495 assert_eq!(
3496 thread.tokens_before_message(&message_2_id),
3497 Some(100),
3498 "Second message should have 100 tokens before it (from first request)"
3499 );
3500 });
3501
3502 // Complete second message
3503 fake_model.send_last_completion_stream_text_chunk("Response 2");
3504 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3505 language_model::TokenUsage {
3506 input_tokens: 250, // Total for this request (includes previous context)
3507 output_tokens: 75,
3508 cache_creation_input_tokens: 0,
3509 cache_read_input_tokens: 0,
3510 },
3511 ));
3512 fake_model.end_last_completion_stream();
3513 cx.run_until_parked();
3514
3515 // Third message
3516 let message_3_id = UserMessageId::new();
3517 thread
3518 .update(cx, |thread, cx| {
3519 thread.send(message_3_id.clone(), ["Third message"], cx)
3520 })
3521 .unwrap();
3522 cx.run_until_parked();
3523
3524 // Third message should have second message's input tokens (250) before it
3525 thread.read_with(cx, |thread, _| {
3526 assert_eq!(
3527 thread.tokens_before_message(&message_3_id),
3528 Some(250),
3529 "Third message should have 250 tokens before it (from second request)"
3530 );
3531 // Second message should still have 100
3532 assert_eq!(
3533 thread.tokens_before_message(&message_2_id),
3534 Some(100),
3535 "Second message should still have 100 tokens before it"
3536 );
3537 // First message still has none
3538 assert_eq!(
3539 thread.tokens_before_message(&message_1_id),
3540 None,
3541 "First message should still have no tokens before it"
3542 );
3543 });
3544}
3545
3546#[gpui::test]
3547async fn test_tokens_before_message_after_truncate(cx: &mut TestAppContext) {
3548 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3549 let fake_model = model.as_fake();
3550
3551 // Set up three messages with responses
3552 let message_1_id = UserMessageId::new();
3553 thread
3554 .update(cx, |thread, cx| {
3555 thread.send(message_1_id.clone(), ["Message 1"], cx)
3556 })
3557 .unwrap();
3558 cx.run_until_parked();
3559 fake_model.send_last_completion_stream_text_chunk("Response 1");
3560 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3561 language_model::TokenUsage {
3562 input_tokens: 100,
3563 output_tokens: 50,
3564 cache_creation_input_tokens: 0,
3565 cache_read_input_tokens: 0,
3566 },
3567 ));
3568 fake_model.end_last_completion_stream();
3569 cx.run_until_parked();
3570
3571 let message_2_id = UserMessageId::new();
3572 thread
3573 .update(cx, |thread, cx| {
3574 thread.send(message_2_id.clone(), ["Message 2"], cx)
3575 })
3576 .unwrap();
3577 cx.run_until_parked();
3578 fake_model.send_last_completion_stream_text_chunk("Response 2");
3579 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3580 language_model::TokenUsage {
3581 input_tokens: 250,
3582 output_tokens: 75,
3583 cache_creation_input_tokens: 0,
3584 cache_read_input_tokens: 0,
3585 },
3586 ));
3587 fake_model.end_last_completion_stream();
3588 cx.run_until_parked();
3589
3590 // Verify initial state
3591 thread.read_with(cx, |thread, _| {
3592 assert_eq!(thread.tokens_before_message(&message_2_id), Some(100));
3593 });
3594
3595 // Truncate at message 2 (removes message 2 and everything after)
3596 thread
3597 .update(cx, |thread, cx| thread.truncate(message_2_id.clone(), cx))
3598 .unwrap();
3599 cx.run_until_parked();
3600
3601 // After truncation, message_2_id no longer exists, so lookup should return None
3602 thread.read_with(cx, |thread, _| {
3603 assert_eq!(
3604 thread.tokens_before_message(&message_2_id),
3605 None,
3606 "After truncation, message 2 no longer exists"
3607 );
3608 // Message 1 still exists but has no tokens before it
3609 assert_eq!(
3610 thread.tokens_before_message(&message_1_id),
3611 None,
3612 "First message still has no tokens before it"
3613 );
3614 });
3615}
3616
3617#[gpui::test]
3618async fn test_terminal_tool_permission_rules(cx: &mut TestAppContext) {
3619 init_test(cx);
3620
3621 let fs = FakeFs::new(cx.executor());
3622 fs.insert_tree("/root", json!({})).await;
3623 let project = Project::test(fs, ["/root".as_ref()], cx).await;
3624
3625 // Test 1: Deny rule blocks command
3626 {
3627 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3628 let environment = Rc::new(FakeThreadEnvironment {
3629 handle: handle.clone(),
3630 });
3631
3632 cx.update(|cx| {
3633 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3634 settings.tool_permissions.tools.insert(
3635 "terminal".into(),
3636 agent_settings::ToolRules {
3637 default_mode: settings::ToolPermissionMode::Confirm,
3638 always_allow: vec![],
3639 always_deny: vec![
3640 agent_settings::CompiledRegex::new(r"rm\s+-rf", false).unwrap(),
3641 ],
3642 always_confirm: vec![],
3643 invalid_patterns: vec![],
3644 },
3645 );
3646 agent_settings::AgentSettings::override_global(settings, cx);
3647 });
3648
3649 #[allow(clippy::arc_with_non_send_sync)]
3650 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3651 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3652
3653 let task = cx.update(|cx| {
3654 tool.run(
3655 crate::TerminalToolInput {
3656 command: "rm -rf /".to_string(),
3657 cd: ".".to_string(),
3658 timeout_ms: None,
3659 },
3660 event_stream,
3661 cx,
3662 )
3663 });
3664
3665 let result = task.await;
3666 assert!(
3667 result.is_err(),
3668 "expected command to be blocked by deny rule"
3669 );
3670 assert!(
3671 result.unwrap_err().to_string().contains("blocked"),
3672 "error should mention the command was blocked"
3673 );
3674 }
3675
3676 // Test 2: Allow rule skips confirmation (and overrides default_mode: Deny)
3677 {
3678 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3679 let environment = Rc::new(FakeThreadEnvironment {
3680 handle: handle.clone(),
3681 });
3682
3683 cx.update(|cx| {
3684 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3685 settings.always_allow_tool_actions = false;
3686 settings.tool_permissions.tools.insert(
3687 "terminal".into(),
3688 agent_settings::ToolRules {
3689 default_mode: settings::ToolPermissionMode::Deny,
3690 always_allow: vec![
3691 agent_settings::CompiledRegex::new(r"^echo\s", false).unwrap(),
3692 ],
3693 always_deny: vec![],
3694 always_confirm: vec![],
3695 invalid_patterns: vec![],
3696 },
3697 );
3698 agent_settings::AgentSettings::override_global(settings, cx);
3699 });
3700
3701 #[allow(clippy::arc_with_non_send_sync)]
3702 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3703 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3704
3705 let task = cx.update(|cx| {
3706 tool.run(
3707 crate::TerminalToolInput {
3708 command: "echo hello".to_string(),
3709 cd: ".".to_string(),
3710 timeout_ms: None,
3711 },
3712 event_stream,
3713 cx,
3714 )
3715 });
3716
3717 let update = rx.expect_update_fields().await;
3718 assert!(
3719 update.content.iter().any(|blocks| {
3720 blocks
3721 .iter()
3722 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
3723 }),
3724 "expected terminal content (allow rule should skip confirmation and override default deny)"
3725 );
3726
3727 let result = task.await;
3728 assert!(
3729 result.is_ok(),
3730 "expected command to succeed without confirmation"
3731 );
3732 }
3733
3734 // Test 3: Confirm rule forces confirmation even with always_allow_tool_actions=true
3735 {
3736 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3737 let environment = Rc::new(FakeThreadEnvironment {
3738 handle: handle.clone(),
3739 });
3740
3741 cx.update(|cx| {
3742 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3743 settings.always_allow_tool_actions = true;
3744 settings.tool_permissions.tools.insert(
3745 "terminal".into(),
3746 agent_settings::ToolRules {
3747 default_mode: settings::ToolPermissionMode::Allow,
3748 always_allow: vec![],
3749 always_deny: vec![],
3750 always_confirm: vec![
3751 agent_settings::CompiledRegex::new(r"sudo", false).unwrap(),
3752 ],
3753 invalid_patterns: vec![],
3754 },
3755 );
3756 agent_settings::AgentSettings::override_global(settings, cx);
3757 });
3758
3759 #[allow(clippy::arc_with_non_send_sync)]
3760 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3761 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3762
3763 let _task = cx.update(|cx| {
3764 tool.run(
3765 crate::TerminalToolInput {
3766 command: "sudo rm file".to_string(),
3767 cd: ".".to_string(),
3768 timeout_ms: None,
3769 },
3770 event_stream,
3771 cx,
3772 )
3773 });
3774
3775 let auth = rx.expect_authorization().await;
3776 assert!(
3777 auth.tool_call.fields.title.is_some(),
3778 "expected authorization request for sudo command despite always_allow_tool_actions=true"
3779 );
3780 }
3781
3782 // Test 4: default_mode: Deny blocks commands when no pattern matches
3783 {
3784 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3785 let environment = Rc::new(FakeThreadEnvironment {
3786 handle: handle.clone(),
3787 });
3788
3789 cx.update(|cx| {
3790 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3791 settings.always_allow_tool_actions = true;
3792 settings.tool_permissions.tools.insert(
3793 "terminal".into(),
3794 agent_settings::ToolRules {
3795 default_mode: settings::ToolPermissionMode::Deny,
3796 always_allow: vec![],
3797 always_deny: vec![],
3798 always_confirm: vec![],
3799 invalid_patterns: vec![],
3800 },
3801 );
3802 agent_settings::AgentSettings::override_global(settings, cx);
3803 });
3804
3805 #[allow(clippy::arc_with_non_send_sync)]
3806 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3807 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3808
3809 let task = cx.update(|cx| {
3810 tool.run(
3811 crate::TerminalToolInput {
3812 command: "echo hello".to_string(),
3813 cd: ".".to_string(),
3814 timeout_ms: None,
3815 },
3816 event_stream,
3817 cx,
3818 )
3819 });
3820
3821 let result = task.await;
3822 assert!(
3823 result.is_err(),
3824 "expected command to be blocked by default_mode: Deny"
3825 );
3826 assert!(
3827 result.unwrap_err().to_string().contains("disabled"),
3828 "error should mention the tool is disabled"
3829 );
3830 }
3831}