1use super::*;
2use acp_thread::{AgentConnection, AgentModelGroupName, AgentModelList, UserMessageId};
3use agent_client_protocol::{self as acp};
4use agent_settings::AgentProfileId;
5use anyhow::Result;
6use client::{Client, UserStore};
7use cloud_llm_client::CompletionIntent;
8use collections::IndexMap;
9use context_server::{ContextServer, ContextServerCommand, ContextServerId};
10use fs::{FakeFs, Fs};
11use futures::{
12 FutureExt as _, StreamExt,
13 channel::{
14 mpsc::{self, UnboundedReceiver},
15 oneshot,
16 },
17 future::{Fuse, Shared},
18};
19use gpui::{
20 App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal,
21 http_client::FakeHttpClient,
22};
23use indoc::indoc;
24use language_model::{
25 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
26 LanguageModelProviderName, LanguageModelRegistry, LanguageModelRequest,
27 LanguageModelRequestMessage, LanguageModelToolResult, LanguageModelToolSchemaFormat,
28 LanguageModelToolUse, MessageContent, Role, StopReason, fake_provider::FakeLanguageModel,
29};
30use pretty_assertions::assert_eq;
31use project::{
32 Project, context_server_store::ContextServerStore, project_settings::ProjectSettings,
33};
34use prompt_store::ProjectContext;
35use reqwest_client::ReqwestClient;
36use schemars::JsonSchema;
37use serde::{Deserialize, Serialize};
38use serde_json::json;
39use settings::{Settings, SettingsStore};
40use std::{
41 path::Path,
42 pin::Pin,
43 rc::Rc,
44 sync::{
45 Arc,
46 atomic::{AtomicBool, Ordering},
47 },
48 time::Duration,
49};
50use util::path;
51
52mod test_tools;
53use test_tools::*;
54
55fn init_test(cx: &mut TestAppContext) {
56 cx.update(|cx| {
57 let settings_store = SettingsStore::test(cx);
58 cx.set_global(settings_store);
59 });
60}
61
62struct FakeTerminalHandle {
63 killed: Arc<AtomicBool>,
64 stopped_by_user: Arc<AtomicBool>,
65 exit_sender: std::cell::RefCell<Option<futures::channel::oneshot::Sender<()>>>,
66 wait_for_exit: Shared<Task<acp::TerminalExitStatus>>,
67 output: acp::TerminalOutputResponse,
68 id: acp::TerminalId,
69}
70
71impl FakeTerminalHandle {
72 fn new_never_exits(cx: &mut App) -> Self {
73 let killed = Arc::new(AtomicBool::new(false));
74 let stopped_by_user = Arc::new(AtomicBool::new(false));
75
76 let (exit_sender, exit_receiver) = futures::channel::oneshot::channel();
77
78 let wait_for_exit = cx
79 .spawn(async move |_cx| {
80 // Wait for the exit signal (sent when kill() is called)
81 let _ = exit_receiver.await;
82 acp::TerminalExitStatus::new()
83 })
84 .shared();
85
86 Self {
87 killed,
88 stopped_by_user,
89 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
90 wait_for_exit,
91 output: acp::TerminalOutputResponse::new("partial output".to_string(), false),
92 id: acp::TerminalId::new("fake_terminal".to_string()),
93 }
94 }
95
96 fn new_with_immediate_exit(cx: &mut App, exit_code: u32) -> Self {
97 let killed = Arc::new(AtomicBool::new(false));
98 let stopped_by_user = Arc::new(AtomicBool::new(false));
99 let (exit_sender, _exit_receiver) = futures::channel::oneshot::channel();
100
101 let wait_for_exit = cx
102 .spawn(async move |_cx| acp::TerminalExitStatus::new().exit_code(exit_code))
103 .shared();
104
105 Self {
106 killed,
107 stopped_by_user,
108 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
109 wait_for_exit,
110 output: acp::TerminalOutputResponse::new("command output".to_string(), false),
111 id: acp::TerminalId::new("fake_terminal".to_string()),
112 }
113 }
114
115 fn was_killed(&self) -> bool {
116 self.killed.load(Ordering::SeqCst)
117 }
118
119 fn set_stopped_by_user(&self, stopped: bool) {
120 self.stopped_by_user.store(stopped, Ordering::SeqCst);
121 }
122
123 fn signal_exit(&self) {
124 if let Some(sender) = self.exit_sender.borrow_mut().take() {
125 let _ = sender.send(());
126 }
127 }
128}
129
130impl crate::TerminalHandle for FakeTerminalHandle {
131 fn id(&self, _cx: &AsyncApp) -> Result<acp::TerminalId> {
132 Ok(self.id.clone())
133 }
134
135 fn current_output(&self, _cx: &AsyncApp) -> Result<acp::TerminalOutputResponse> {
136 Ok(self.output.clone())
137 }
138
139 fn wait_for_exit(&self, _cx: &AsyncApp) -> Result<Shared<Task<acp::TerminalExitStatus>>> {
140 Ok(self.wait_for_exit.clone())
141 }
142
143 fn kill(&self, _cx: &AsyncApp) -> Result<()> {
144 self.killed.store(true, Ordering::SeqCst);
145 self.signal_exit();
146 Ok(())
147 }
148
149 fn was_stopped_by_user(&self, _cx: &AsyncApp) -> Result<bool> {
150 Ok(self.stopped_by_user.load(Ordering::SeqCst))
151 }
152}
153
154struct FakeThreadEnvironment {
155 handle: Rc<FakeTerminalHandle>,
156}
157
158impl crate::ThreadEnvironment for FakeThreadEnvironment {
159 fn create_terminal(
160 &self,
161 _command: String,
162 _cwd: Option<std::path::PathBuf>,
163 _output_byte_limit: Option<u64>,
164 _cx: &mut AsyncApp,
165 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
166 Task::ready(Ok(self.handle.clone() as Rc<dyn crate::TerminalHandle>))
167 }
168}
169
170/// Environment that creates multiple independent terminal handles for testing concurrent terminals.
171struct MultiTerminalEnvironment {
172 handles: std::cell::RefCell<Vec<Rc<FakeTerminalHandle>>>,
173}
174
175impl MultiTerminalEnvironment {
176 fn new() -> Self {
177 Self {
178 handles: std::cell::RefCell::new(Vec::new()),
179 }
180 }
181
182 fn handles(&self) -> Vec<Rc<FakeTerminalHandle>> {
183 self.handles.borrow().clone()
184 }
185}
186
187impl crate::ThreadEnvironment for MultiTerminalEnvironment {
188 fn create_terminal(
189 &self,
190 _command: String,
191 _cwd: Option<std::path::PathBuf>,
192 _output_byte_limit: Option<u64>,
193 cx: &mut AsyncApp,
194 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
195 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
196 self.handles.borrow_mut().push(handle.clone());
197 Task::ready(Ok(handle as Rc<dyn crate::TerminalHandle>))
198 }
199}
200
201fn always_allow_tools(cx: &mut TestAppContext) {
202 cx.update(|cx| {
203 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
204 settings.always_allow_tool_actions = true;
205 agent_settings::AgentSettings::override_global(settings, cx);
206 });
207}
208
209#[gpui::test]
210async fn test_echo(cx: &mut TestAppContext) {
211 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
212 let fake_model = model.as_fake();
213
214 let events = thread
215 .update(cx, |thread, cx| {
216 thread.send(UserMessageId::new(), ["Testing: Reply with 'Hello'"], cx)
217 })
218 .unwrap();
219 cx.run_until_parked();
220 fake_model.send_last_completion_stream_text_chunk("Hello");
221 fake_model
222 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
223 fake_model.end_last_completion_stream();
224
225 let events = events.collect().await;
226 thread.update(cx, |thread, _cx| {
227 assert_eq!(
228 thread.last_message().unwrap().to_markdown(),
229 indoc! {"
230 ## Assistant
231
232 Hello
233 "}
234 )
235 });
236 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
237}
238
239#[gpui::test]
240async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) {
241 init_test(cx);
242 always_allow_tools(cx);
243
244 let fs = FakeFs::new(cx.executor());
245 let project = Project::test(fs, [], cx).await;
246
247 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
248 let environment = Rc::new(FakeThreadEnvironment {
249 handle: handle.clone(),
250 });
251
252 #[allow(clippy::arc_with_non_send_sync)]
253 let tool = Arc::new(crate::TerminalTool::new(project, environment));
254 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
255
256 let task = cx.update(|cx| {
257 tool.run(
258 crate::TerminalToolInput {
259 command: "sleep 1000".to_string(),
260 cd: ".".to_string(),
261 timeout_ms: Some(5),
262 },
263 event_stream,
264 cx,
265 )
266 });
267
268 let update = rx.expect_update_fields().await;
269 assert!(
270 update.content.iter().any(|blocks| {
271 blocks
272 .iter()
273 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
274 }),
275 "expected tool call update to include terminal content"
276 );
277
278 let mut task_future: Pin<Box<Fuse<Task<Result<String>>>>> = Box::pin(task.fuse());
279
280 let deadline = std::time::Instant::now() + Duration::from_millis(500);
281 loop {
282 if let Some(result) = task_future.as_mut().now_or_never() {
283 let result = result.expect("terminal tool task should complete");
284
285 assert!(
286 handle.was_killed(),
287 "expected terminal handle to be killed on timeout"
288 );
289 assert!(
290 result.contains("partial output"),
291 "expected result to include terminal output, got: {result}"
292 );
293 return;
294 }
295
296 if std::time::Instant::now() >= deadline {
297 panic!("timed out waiting for terminal tool task to complete");
298 }
299
300 cx.run_until_parked();
301 cx.background_executor.timer(Duration::from_millis(1)).await;
302 }
303}
304
305#[gpui::test]
306#[ignore]
307async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) {
308 init_test(cx);
309 always_allow_tools(cx);
310
311 let fs = FakeFs::new(cx.executor());
312 let project = Project::test(fs, [], cx).await;
313
314 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
315 let environment = Rc::new(FakeThreadEnvironment {
316 handle: handle.clone(),
317 });
318
319 #[allow(clippy::arc_with_non_send_sync)]
320 let tool = Arc::new(crate::TerminalTool::new(project, environment));
321 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
322
323 let _task = cx.update(|cx| {
324 tool.run(
325 crate::TerminalToolInput {
326 command: "sleep 1000".to_string(),
327 cd: ".".to_string(),
328 timeout_ms: None,
329 },
330 event_stream,
331 cx,
332 )
333 });
334
335 let update = rx.expect_update_fields().await;
336 assert!(
337 update.content.iter().any(|blocks| {
338 blocks
339 .iter()
340 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
341 }),
342 "expected tool call update to include terminal content"
343 );
344
345 smol::Timer::after(Duration::from_millis(25)).await;
346
347 assert!(
348 !handle.was_killed(),
349 "did not expect terminal handle to be killed without a timeout"
350 );
351}
352
353#[gpui::test]
354async fn test_thinking(cx: &mut TestAppContext) {
355 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
356 let fake_model = model.as_fake();
357
358 let events = thread
359 .update(cx, |thread, cx| {
360 thread.send(
361 UserMessageId::new(),
362 [indoc! {"
363 Testing:
364
365 Generate a thinking step where you just think the word 'Think',
366 and have your final answer be 'Hello'
367 "}],
368 cx,
369 )
370 })
371 .unwrap();
372 cx.run_until_parked();
373 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Thinking {
374 text: "Think".to_string(),
375 signature: None,
376 });
377 fake_model.send_last_completion_stream_text_chunk("Hello");
378 fake_model
379 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
380 fake_model.end_last_completion_stream();
381
382 let events = events.collect().await;
383 thread.update(cx, |thread, _cx| {
384 assert_eq!(
385 thread.last_message().unwrap().to_markdown(),
386 indoc! {"
387 ## Assistant
388
389 <think>Think</think>
390 Hello
391 "}
392 )
393 });
394 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
395}
396
397#[gpui::test]
398async fn test_system_prompt(cx: &mut TestAppContext) {
399 let ThreadTest {
400 model,
401 thread,
402 project_context,
403 ..
404 } = setup(cx, TestModel::Fake).await;
405 let fake_model = model.as_fake();
406
407 project_context.update(cx, |project_context, _cx| {
408 project_context.shell = "test-shell".into()
409 });
410 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
411 thread
412 .update(cx, |thread, cx| {
413 thread.send(UserMessageId::new(), ["abc"], cx)
414 })
415 .unwrap();
416 cx.run_until_parked();
417 let mut pending_completions = fake_model.pending_completions();
418 assert_eq!(
419 pending_completions.len(),
420 1,
421 "unexpected pending completions: {:?}",
422 pending_completions
423 );
424
425 let pending_completion = pending_completions.pop().unwrap();
426 assert_eq!(pending_completion.messages[0].role, Role::System);
427
428 let system_message = &pending_completion.messages[0];
429 let system_prompt = system_message.content[0].to_str().unwrap();
430 assert!(
431 system_prompt.contains("test-shell"),
432 "unexpected system message: {:?}",
433 system_message
434 );
435 assert!(
436 system_prompt.contains("## Fixing Diagnostics"),
437 "unexpected system message: {:?}",
438 system_message
439 );
440}
441
442#[gpui::test]
443async fn test_system_prompt_without_tools(cx: &mut TestAppContext) {
444 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
445 let fake_model = model.as_fake();
446
447 thread
448 .update(cx, |thread, cx| {
449 thread.send(UserMessageId::new(), ["abc"], cx)
450 })
451 .unwrap();
452 cx.run_until_parked();
453 let mut pending_completions = fake_model.pending_completions();
454 assert_eq!(
455 pending_completions.len(),
456 1,
457 "unexpected pending completions: {:?}",
458 pending_completions
459 );
460
461 let pending_completion = pending_completions.pop().unwrap();
462 assert_eq!(pending_completion.messages[0].role, Role::System);
463
464 let system_message = &pending_completion.messages[0];
465 let system_prompt = system_message.content[0].to_str().unwrap();
466 assert!(
467 !system_prompt.contains("## Tool Use"),
468 "unexpected system message: {:?}",
469 system_message
470 );
471 assert!(
472 !system_prompt.contains("## Fixing Diagnostics"),
473 "unexpected system message: {:?}",
474 system_message
475 );
476}
477
478#[gpui::test]
479async fn test_prompt_caching(cx: &mut TestAppContext) {
480 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
481 let fake_model = model.as_fake();
482
483 // Send initial user message and verify it's cached
484 thread
485 .update(cx, |thread, cx| {
486 thread.send(UserMessageId::new(), ["Message 1"], cx)
487 })
488 .unwrap();
489 cx.run_until_parked();
490
491 let completion = fake_model.pending_completions().pop().unwrap();
492 assert_eq!(
493 completion.messages[1..],
494 vec![LanguageModelRequestMessage {
495 role: Role::User,
496 content: vec!["Message 1".into()],
497 cache: true,
498 reasoning_details: None,
499 }]
500 );
501 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
502 "Response to Message 1".into(),
503 ));
504 fake_model.end_last_completion_stream();
505 cx.run_until_parked();
506
507 // Send another user message and verify only the latest is cached
508 thread
509 .update(cx, |thread, cx| {
510 thread.send(UserMessageId::new(), ["Message 2"], cx)
511 })
512 .unwrap();
513 cx.run_until_parked();
514
515 let completion = fake_model.pending_completions().pop().unwrap();
516 assert_eq!(
517 completion.messages[1..],
518 vec![
519 LanguageModelRequestMessage {
520 role: Role::User,
521 content: vec!["Message 1".into()],
522 cache: false,
523 reasoning_details: None,
524 },
525 LanguageModelRequestMessage {
526 role: Role::Assistant,
527 content: vec!["Response to Message 1".into()],
528 cache: false,
529 reasoning_details: None,
530 },
531 LanguageModelRequestMessage {
532 role: Role::User,
533 content: vec!["Message 2".into()],
534 cache: true,
535 reasoning_details: None,
536 }
537 ]
538 );
539 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
540 "Response to Message 2".into(),
541 ));
542 fake_model.end_last_completion_stream();
543 cx.run_until_parked();
544
545 // Simulate a tool call and verify that the latest tool result is cached
546 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
547 thread
548 .update(cx, |thread, cx| {
549 thread.send(UserMessageId::new(), ["Use the echo tool"], cx)
550 })
551 .unwrap();
552 cx.run_until_parked();
553
554 let tool_use = LanguageModelToolUse {
555 id: "tool_1".into(),
556 name: EchoTool::name().into(),
557 raw_input: json!({"text": "test"}).to_string(),
558 input: json!({"text": "test"}),
559 is_input_complete: true,
560 thought_signature: None,
561 };
562 fake_model
563 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
564 fake_model.end_last_completion_stream();
565 cx.run_until_parked();
566
567 let completion = fake_model.pending_completions().pop().unwrap();
568 let tool_result = LanguageModelToolResult {
569 tool_use_id: "tool_1".into(),
570 tool_name: EchoTool::name().into(),
571 is_error: false,
572 content: "test".into(),
573 output: Some("test".into()),
574 };
575 assert_eq!(
576 completion.messages[1..],
577 vec![
578 LanguageModelRequestMessage {
579 role: Role::User,
580 content: vec!["Message 1".into()],
581 cache: false,
582 reasoning_details: None,
583 },
584 LanguageModelRequestMessage {
585 role: Role::Assistant,
586 content: vec!["Response to Message 1".into()],
587 cache: false,
588 reasoning_details: None,
589 },
590 LanguageModelRequestMessage {
591 role: Role::User,
592 content: vec!["Message 2".into()],
593 cache: false,
594 reasoning_details: None,
595 },
596 LanguageModelRequestMessage {
597 role: Role::Assistant,
598 content: vec!["Response to Message 2".into()],
599 cache: false,
600 reasoning_details: None,
601 },
602 LanguageModelRequestMessage {
603 role: Role::User,
604 content: vec!["Use the echo tool".into()],
605 cache: false,
606 reasoning_details: None,
607 },
608 LanguageModelRequestMessage {
609 role: Role::Assistant,
610 content: vec![MessageContent::ToolUse(tool_use)],
611 cache: false,
612 reasoning_details: None,
613 },
614 LanguageModelRequestMessage {
615 role: Role::User,
616 content: vec![MessageContent::ToolResult(tool_result)],
617 cache: true,
618 reasoning_details: None,
619 }
620 ]
621 );
622}
623
624#[gpui::test]
625#[cfg_attr(not(feature = "e2e"), ignore)]
626async fn test_basic_tool_calls(cx: &mut TestAppContext) {
627 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
628
629 // Test a tool call that's likely to complete *before* streaming stops.
630 let events = thread
631 .update(cx, |thread, cx| {
632 thread.add_tool(EchoTool);
633 thread.send(
634 UserMessageId::new(),
635 ["Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'."],
636 cx,
637 )
638 })
639 .unwrap()
640 .collect()
641 .await;
642 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
643
644 // Test a tool calls that's likely to complete *after* streaming stops.
645 let events = thread
646 .update(cx, |thread, cx| {
647 thread.remove_tool(&EchoTool::name());
648 thread.add_tool(DelayTool);
649 thread.send(
650 UserMessageId::new(),
651 [
652 "Now call the delay tool with 200ms.",
653 "When the timer goes off, then you echo the output of the tool.",
654 ],
655 cx,
656 )
657 })
658 .unwrap()
659 .collect()
660 .await;
661 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
662 thread.update(cx, |thread, _cx| {
663 assert!(
664 thread
665 .last_message()
666 .unwrap()
667 .as_agent_message()
668 .unwrap()
669 .content
670 .iter()
671 .any(|content| {
672 if let AgentMessageContent::Text(text) = content {
673 text.contains("Ding")
674 } else {
675 false
676 }
677 }),
678 "{}",
679 thread.to_markdown()
680 );
681 });
682}
683
684#[gpui::test]
685#[cfg_attr(not(feature = "e2e"), ignore)]
686async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
687 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
688
689 // Test a tool call that's likely to complete *before* streaming stops.
690 let mut events = thread
691 .update(cx, |thread, cx| {
692 thread.add_tool(WordListTool);
693 thread.send(UserMessageId::new(), ["Test the word_list tool."], cx)
694 })
695 .unwrap();
696
697 let mut saw_partial_tool_use = false;
698 while let Some(event) = events.next().await {
699 if let Ok(ThreadEvent::ToolCall(tool_call)) = event {
700 thread.update(cx, |thread, _cx| {
701 // Look for a tool use in the thread's last message
702 let message = thread.last_message().unwrap();
703 let agent_message = message.as_agent_message().unwrap();
704 let last_content = agent_message.content.last().unwrap();
705 if let AgentMessageContent::ToolUse(last_tool_use) = last_content {
706 assert_eq!(last_tool_use.name.as_ref(), "word_list");
707 if tool_call.status == acp::ToolCallStatus::Pending {
708 if !last_tool_use.is_input_complete
709 && last_tool_use.input.get("g").is_none()
710 {
711 saw_partial_tool_use = true;
712 }
713 } else {
714 last_tool_use
715 .input
716 .get("a")
717 .expect("'a' has streamed because input is now complete");
718 last_tool_use
719 .input
720 .get("g")
721 .expect("'g' has streamed because input is now complete");
722 }
723 } else {
724 panic!("last content should be a tool use");
725 }
726 });
727 }
728 }
729
730 assert!(
731 saw_partial_tool_use,
732 "should see at least one partially streamed tool use in the history"
733 );
734}
735
736#[gpui::test]
737async fn test_tool_authorization(cx: &mut TestAppContext) {
738 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
739 let fake_model = model.as_fake();
740
741 let mut events = thread
742 .update(cx, |thread, cx| {
743 thread.add_tool(ToolRequiringPermission);
744 thread.send(UserMessageId::new(), ["abc"], cx)
745 })
746 .unwrap();
747 cx.run_until_parked();
748 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
749 LanguageModelToolUse {
750 id: "tool_id_1".into(),
751 name: ToolRequiringPermission::name().into(),
752 raw_input: "{}".into(),
753 input: json!({}),
754 is_input_complete: true,
755 thought_signature: None,
756 },
757 ));
758 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
759 LanguageModelToolUse {
760 id: "tool_id_2".into(),
761 name: ToolRequiringPermission::name().into(),
762 raw_input: "{}".into(),
763 input: json!({}),
764 is_input_complete: true,
765 thought_signature: None,
766 },
767 ));
768 fake_model.end_last_completion_stream();
769 let tool_call_auth_1 = next_tool_call_authorization(&mut events).await;
770 let tool_call_auth_2 = next_tool_call_authorization(&mut events).await;
771
772 // Approve the first
773 tool_call_auth_1
774 .response
775 .send(tool_call_auth_1.options[1].option_id.clone())
776 .unwrap();
777 cx.run_until_parked();
778
779 // Reject the second
780 tool_call_auth_2
781 .response
782 .send(tool_call_auth_1.options[2].option_id.clone())
783 .unwrap();
784 cx.run_until_parked();
785
786 let completion = fake_model.pending_completions().pop().unwrap();
787 let message = completion.messages.last().unwrap();
788 assert_eq!(
789 message.content,
790 vec![
791 language_model::MessageContent::ToolResult(LanguageModelToolResult {
792 tool_use_id: tool_call_auth_1.tool_call.tool_call_id.0.to_string().into(),
793 tool_name: ToolRequiringPermission::name().into(),
794 is_error: false,
795 content: "Allowed".into(),
796 output: Some("Allowed".into())
797 }),
798 language_model::MessageContent::ToolResult(LanguageModelToolResult {
799 tool_use_id: tool_call_auth_2.tool_call.tool_call_id.0.to_string().into(),
800 tool_name: ToolRequiringPermission::name().into(),
801 is_error: true,
802 content: "Permission to run tool denied by user".into(),
803 output: Some("Permission to run tool denied by user".into())
804 })
805 ]
806 );
807
808 // Simulate yet another tool call.
809 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
810 LanguageModelToolUse {
811 id: "tool_id_3".into(),
812 name: ToolRequiringPermission::name().into(),
813 raw_input: "{}".into(),
814 input: json!({}),
815 is_input_complete: true,
816 thought_signature: None,
817 },
818 ));
819 fake_model.end_last_completion_stream();
820
821 // Respond by always allowing tools.
822 let tool_call_auth_3 = next_tool_call_authorization(&mut events).await;
823 tool_call_auth_3
824 .response
825 .send(tool_call_auth_3.options[0].option_id.clone())
826 .unwrap();
827 cx.run_until_parked();
828 let completion = fake_model.pending_completions().pop().unwrap();
829 let message = completion.messages.last().unwrap();
830 assert_eq!(
831 message.content,
832 vec![language_model::MessageContent::ToolResult(
833 LanguageModelToolResult {
834 tool_use_id: tool_call_auth_3.tool_call.tool_call_id.0.to_string().into(),
835 tool_name: ToolRequiringPermission::name().into(),
836 is_error: false,
837 content: "Allowed".into(),
838 output: Some("Allowed".into())
839 }
840 )]
841 );
842
843 // Simulate a final tool call, ensuring we don't trigger authorization.
844 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
845 LanguageModelToolUse {
846 id: "tool_id_4".into(),
847 name: ToolRequiringPermission::name().into(),
848 raw_input: "{}".into(),
849 input: json!({}),
850 is_input_complete: true,
851 thought_signature: None,
852 },
853 ));
854 fake_model.end_last_completion_stream();
855 cx.run_until_parked();
856 let completion = fake_model.pending_completions().pop().unwrap();
857 let message = completion.messages.last().unwrap();
858 assert_eq!(
859 message.content,
860 vec![language_model::MessageContent::ToolResult(
861 LanguageModelToolResult {
862 tool_use_id: "tool_id_4".into(),
863 tool_name: ToolRequiringPermission::name().into(),
864 is_error: false,
865 content: "Allowed".into(),
866 output: Some("Allowed".into())
867 }
868 )]
869 );
870}
871
872#[gpui::test]
873async fn test_tool_hallucination(cx: &mut TestAppContext) {
874 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
875 let fake_model = model.as_fake();
876
877 let mut events = thread
878 .update(cx, |thread, cx| {
879 thread.send(UserMessageId::new(), ["abc"], cx)
880 })
881 .unwrap();
882 cx.run_until_parked();
883 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
884 LanguageModelToolUse {
885 id: "tool_id_1".into(),
886 name: "nonexistent_tool".into(),
887 raw_input: "{}".into(),
888 input: json!({}),
889 is_input_complete: true,
890 thought_signature: None,
891 },
892 ));
893 fake_model.end_last_completion_stream();
894
895 let tool_call = expect_tool_call(&mut events).await;
896 assert_eq!(tool_call.title, "nonexistent_tool");
897 assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
898 let update = expect_tool_call_update_fields(&mut events).await;
899 assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
900}
901
902#[gpui::test]
903async fn test_resume_after_tool_use_limit(cx: &mut TestAppContext) {
904 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
905 let fake_model = model.as_fake();
906
907 let events = thread
908 .update(cx, |thread, cx| {
909 thread.add_tool(EchoTool);
910 thread.send(UserMessageId::new(), ["abc"], cx)
911 })
912 .unwrap();
913 cx.run_until_parked();
914 let tool_use = LanguageModelToolUse {
915 id: "tool_id_1".into(),
916 name: EchoTool::name().into(),
917 raw_input: "{}".into(),
918 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
919 is_input_complete: true,
920 thought_signature: None,
921 };
922 fake_model
923 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
924 fake_model.end_last_completion_stream();
925
926 cx.run_until_parked();
927 let completion = fake_model.pending_completions().pop().unwrap();
928 let tool_result = LanguageModelToolResult {
929 tool_use_id: "tool_id_1".into(),
930 tool_name: EchoTool::name().into(),
931 is_error: false,
932 content: "def".into(),
933 output: Some("def".into()),
934 };
935 assert_eq!(
936 completion.messages[1..],
937 vec![
938 LanguageModelRequestMessage {
939 role: Role::User,
940 content: vec!["abc".into()],
941 cache: false,
942 reasoning_details: None,
943 },
944 LanguageModelRequestMessage {
945 role: Role::Assistant,
946 content: vec![MessageContent::ToolUse(tool_use.clone())],
947 cache: false,
948 reasoning_details: None,
949 },
950 LanguageModelRequestMessage {
951 role: Role::User,
952 content: vec![MessageContent::ToolResult(tool_result.clone())],
953 cache: true,
954 reasoning_details: None,
955 },
956 ]
957 );
958
959 // Simulate reaching tool use limit.
960 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
961 fake_model.end_last_completion_stream();
962 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
963 assert!(
964 last_event
965 .unwrap_err()
966 .is::<language_model::ToolUseLimitReachedError>()
967 );
968
969 let events = thread.update(cx, |thread, cx| thread.resume(cx)).unwrap();
970 cx.run_until_parked();
971 let completion = fake_model.pending_completions().pop().unwrap();
972 assert_eq!(
973 completion.messages[1..],
974 vec![
975 LanguageModelRequestMessage {
976 role: Role::User,
977 content: vec!["abc".into()],
978 cache: false,
979 reasoning_details: None,
980 },
981 LanguageModelRequestMessage {
982 role: Role::Assistant,
983 content: vec![MessageContent::ToolUse(tool_use)],
984 cache: false,
985 reasoning_details: None,
986 },
987 LanguageModelRequestMessage {
988 role: Role::User,
989 content: vec![MessageContent::ToolResult(tool_result)],
990 cache: false,
991 reasoning_details: None,
992 },
993 LanguageModelRequestMessage {
994 role: Role::User,
995 content: vec!["Continue where you left off".into()],
996 cache: true,
997 reasoning_details: None,
998 }
999 ]
1000 );
1001
1002 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text("Done".into()));
1003 fake_model.end_last_completion_stream();
1004 events.collect::<Vec<_>>().await;
1005 thread.read_with(cx, |thread, _cx| {
1006 assert_eq!(
1007 thread.last_message().unwrap().to_markdown(),
1008 indoc! {"
1009 ## Assistant
1010
1011 Done
1012 "}
1013 )
1014 });
1015}
1016
1017#[gpui::test]
1018async fn test_send_after_tool_use_limit(cx: &mut TestAppContext) {
1019 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1020 let fake_model = model.as_fake();
1021
1022 let events = thread
1023 .update(cx, |thread, cx| {
1024 thread.add_tool(EchoTool);
1025 thread.send(UserMessageId::new(), ["abc"], cx)
1026 })
1027 .unwrap();
1028 cx.run_until_parked();
1029
1030 let tool_use = LanguageModelToolUse {
1031 id: "tool_id_1".into(),
1032 name: EchoTool::name().into(),
1033 raw_input: "{}".into(),
1034 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
1035 is_input_complete: true,
1036 thought_signature: None,
1037 };
1038 let tool_result = LanguageModelToolResult {
1039 tool_use_id: "tool_id_1".into(),
1040 tool_name: EchoTool::name().into(),
1041 is_error: false,
1042 content: "def".into(),
1043 output: Some("def".into()),
1044 };
1045 fake_model
1046 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
1047 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
1048 fake_model.end_last_completion_stream();
1049 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
1050 assert!(
1051 last_event
1052 .unwrap_err()
1053 .is::<language_model::ToolUseLimitReachedError>()
1054 );
1055
1056 thread
1057 .update(cx, |thread, cx| {
1058 thread.send(UserMessageId::new(), vec!["ghi"], cx)
1059 })
1060 .unwrap();
1061 cx.run_until_parked();
1062 let completion = fake_model.pending_completions().pop().unwrap();
1063 assert_eq!(
1064 completion.messages[1..],
1065 vec![
1066 LanguageModelRequestMessage {
1067 role: Role::User,
1068 content: vec!["abc".into()],
1069 cache: false,
1070 reasoning_details: None,
1071 },
1072 LanguageModelRequestMessage {
1073 role: Role::Assistant,
1074 content: vec![MessageContent::ToolUse(tool_use)],
1075 cache: false,
1076 reasoning_details: None,
1077 },
1078 LanguageModelRequestMessage {
1079 role: Role::User,
1080 content: vec![MessageContent::ToolResult(tool_result)],
1081 cache: false,
1082 reasoning_details: None,
1083 },
1084 LanguageModelRequestMessage {
1085 role: Role::User,
1086 content: vec!["ghi".into()],
1087 cache: true,
1088 reasoning_details: None,
1089 }
1090 ]
1091 );
1092}
1093
1094async fn expect_tool_call(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::ToolCall {
1095 let event = events
1096 .next()
1097 .await
1098 .expect("no tool call authorization event received")
1099 .unwrap();
1100 match event {
1101 ThreadEvent::ToolCall(tool_call) => tool_call,
1102 event => {
1103 panic!("Unexpected event {event:?}");
1104 }
1105 }
1106}
1107
1108async fn expect_tool_call_update_fields(
1109 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1110) -> acp::ToolCallUpdate {
1111 let event = events
1112 .next()
1113 .await
1114 .expect("no tool call authorization event received")
1115 .unwrap();
1116 match event {
1117 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(update)) => update,
1118 event => {
1119 panic!("Unexpected event {event:?}");
1120 }
1121 }
1122}
1123
1124async fn next_tool_call_authorization(
1125 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1126) -> ToolCallAuthorization {
1127 loop {
1128 let event = events
1129 .next()
1130 .await
1131 .expect("no tool call authorization event received")
1132 .unwrap();
1133 if let ThreadEvent::ToolCallAuthorization(tool_call_authorization) = event {
1134 let permission_kinds = tool_call_authorization
1135 .options
1136 .iter()
1137 .map(|o| o.kind)
1138 .collect::<Vec<_>>();
1139 assert_eq!(
1140 permission_kinds,
1141 vec![
1142 acp::PermissionOptionKind::AllowAlways,
1143 acp::PermissionOptionKind::AllowOnce,
1144 acp::PermissionOptionKind::RejectOnce,
1145 ]
1146 );
1147 return tool_call_authorization;
1148 }
1149 }
1150}
1151
1152#[gpui::test]
1153#[cfg_attr(not(feature = "e2e"), ignore)]
1154async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
1155 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1156
1157 // Test concurrent tool calls with different delay times
1158 let events = thread
1159 .update(cx, |thread, cx| {
1160 thread.add_tool(DelayTool);
1161 thread.send(
1162 UserMessageId::new(),
1163 [
1164 "Call the delay tool twice in the same message.",
1165 "Once with 100ms. Once with 300ms.",
1166 "When both timers are complete, describe the outputs.",
1167 ],
1168 cx,
1169 )
1170 })
1171 .unwrap()
1172 .collect()
1173 .await;
1174
1175 let stop_reasons = stop_events(events);
1176 assert_eq!(stop_reasons, vec![acp::StopReason::EndTurn]);
1177
1178 thread.update(cx, |thread, _cx| {
1179 let last_message = thread.last_message().unwrap();
1180 let agent_message = last_message.as_agent_message().unwrap();
1181 let text = agent_message
1182 .content
1183 .iter()
1184 .filter_map(|content| {
1185 if let AgentMessageContent::Text(text) = content {
1186 Some(text.as_str())
1187 } else {
1188 None
1189 }
1190 })
1191 .collect::<String>();
1192
1193 assert!(text.contains("Ding"));
1194 });
1195}
1196
1197#[gpui::test]
1198async fn test_profiles(cx: &mut TestAppContext) {
1199 let ThreadTest {
1200 model, thread, fs, ..
1201 } = setup(cx, TestModel::Fake).await;
1202 let fake_model = model.as_fake();
1203
1204 thread.update(cx, |thread, _cx| {
1205 thread.add_tool(DelayTool);
1206 thread.add_tool(EchoTool);
1207 thread.add_tool(InfiniteTool);
1208 });
1209
1210 // Override profiles and wait for settings to be loaded.
1211 fs.insert_file(
1212 paths::settings_file(),
1213 json!({
1214 "agent": {
1215 "profiles": {
1216 "test-1": {
1217 "name": "Test Profile 1",
1218 "tools": {
1219 EchoTool::name(): true,
1220 DelayTool::name(): true,
1221 }
1222 },
1223 "test-2": {
1224 "name": "Test Profile 2",
1225 "tools": {
1226 InfiniteTool::name(): true,
1227 }
1228 }
1229 }
1230 }
1231 })
1232 .to_string()
1233 .into_bytes(),
1234 )
1235 .await;
1236 cx.run_until_parked();
1237
1238 // Test that test-1 profile (default) has echo and delay tools
1239 thread
1240 .update(cx, |thread, cx| {
1241 thread.set_profile(AgentProfileId("test-1".into()), cx);
1242 thread.send(UserMessageId::new(), ["test"], cx)
1243 })
1244 .unwrap();
1245 cx.run_until_parked();
1246
1247 let mut pending_completions = fake_model.pending_completions();
1248 assert_eq!(pending_completions.len(), 1);
1249 let completion = pending_completions.pop().unwrap();
1250 let tool_names: Vec<String> = completion
1251 .tools
1252 .iter()
1253 .map(|tool| tool.name.clone())
1254 .collect();
1255 assert_eq!(tool_names, vec![DelayTool::name(), EchoTool::name()]);
1256 fake_model.end_last_completion_stream();
1257
1258 // Switch to test-2 profile, and verify that it has only the infinite tool.
1259 thread
1260 .update(cx, |thread, cx| {
1261 thread.set_profile(AgentProfileId("test-2".into()), cx);
1262 thread.send(UserMessageId::new(), ["test2"], cx)
1263 })
1264 .unwrap();
1265 cx.run_until_parked();
1266 let mut pending_completions = fake_model.pending_completions();
1267 assert_eq!(pending_completions.len(), 1);
1268 let completion = pending_completions.pop().unwrap();
1269 let tool_names: Vec<String> = completion
1270 .tools
1271 .iter()
1272 .map(|tool| tool.name.clone())
1273 .collect();
1274 assert_eq!(tool_names, vec![InfiniteTool::name()]);
1275}
1276
1277#[gpui::test]
1278async fn test_mcp_tools(cx: &mut TestAppContext) {
1279 let ThreadTest {
1280 model,
1281 thread,
1282 context_server_store,
1283 fs,
1284 ..
1285 } = setup(cx, TestModel::Fake).await;
1286 let fake_model = model.as_fake();
1287
1288 // Override profiles and wait for settings to be loaded.
1289 fs.insert_file(
1290 paths::settings_file(),
1291 json!({
1292 "agent": {
1293 "always_allow_tool_actions": true,
1294 "profiles": {
1295 "test": {
1296 "name": "Test Profile",
1297 "enable_all_context_servers": true,
1298 "tools": {
1299 EchoTool::name(): true,
1300 }
1301 },
1302 }
1303 }
1304 })
1305 .to_string()
1306 .into_bytes(),
1307 )
1308 .await;
1309 cx.run_until_parked();
1310 thread.update(cx, |thread, cx| {
1311 thread.set_profile(AgentProfileId("test".into()), cx)
1312 });
1313
1314 let mut mcp_tool_calls = setup_context_server(
1315 "test_server",
1316 vec![context_server::types::Tool {
1317 name: "echo".into(),
1318 description: None,
1319 input_schema: serde_json::to_value(EchoTool::input_schema(
1320 LanguageModelToolSchemaFormat::JsonSchema,
1321 ))
1322 .unwrap(),
1323 output_schema: None,
1324 annotations: None,
1325 }],
1326 &context_server_store,
1327 cx,
1328 );
1329
1330 let events = thread.update(cx, |thread, cx| {
1331 thread.send(UserMessageId::new(), ["Hey"], cx).unwrap()
1332 });
1333 cx.run_until_parked();
1334
1335 // Simulate the model calling the MCP tool.
1336 let completion = fake_model.pending_completions().pop().unwrap();
1337 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1338 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1339 LanguageModelToolUse {
1340 id: "tool_1".into(),
1341 name: "echo".into(),
1342 raw_input: json!({"text": "test"}).to_string(),
1343 input: json!({"text": "test"}),
1344 is_input_complete: true,
1345 thought_signature: None,
1346 },
1347 ));
1348 fake_model.end_last_completion_stream();
1349 cx.run_until_parked();
1350
1351 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1352 assert_eq!(tool_call_params.name, "echo");
1353 assert_eq!(tool_call_params.arguments, Some(json!({"text": "test"})));
1354 tool_call_response
1355 .send(context_server::types::CallToolResponse {
1356 content: vec![context_server::types::ToolResponseContent::Text {
1357 text: "test".into(),
1358 }],
1359 is_error: None,
1360 meta: None,
1361 structured_content: None,
1362 })
1363 .unwrap();
1364 cx.run_until_parked();
1365
1366 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1367 fake_model.send_last_completion_stream_text_chunk("Done!");
1368 fake_model.end_last_completion_stream();
1369 events.collect::<Vec<_>>().await;
1370
1371 // Send again after adding the echo tool, ensuring the name collision is resolved.
1372 let events = thread.update(cx, |thread, cx| {
1373 thread.add_tool(EchoTool);
1374 thread.send(UserMessageId::new(), ["Go"], cx).unwrap()
1375 });
1376 cx.run_until_parked();
1377 let completion = fake_model.pending_completions().pop().unwrap();
1378 assert_eq!(
1379 tool_names_for_completion(&completion),
1380 vec!["echo", "test_server_echo"]
1381 );
1382 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1383 LanguageModelToolUse {
1384 id: "tool_2".into(),
1385 name: "test_server_echo".into(),
1386 raw_input: json!({"text": "mcp"}).to_string(),
1387 input: json!({"text": "mcp"}),
1388 is_input_complete: true,
1389 thought_signature: None,
1390 },
1391 ));
1392 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1393 LanguageModelToolUse {
1394 id: "tool_3".into(),
1395 name: "echo".into(),
1396 raw_input: json!({"text": "native"}).to_string(),
1397 input: json!({"text": "native"}),
1398 is_input_complete: true,
1399 thought_signature: None,
1400 },
1401 ));
1402 fake_model.end_last_completion_stream();
1403 cx.run_until_parked();
1404
1405 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1406 assert_eq!(tool_call_params.name, "echo");
1407 assert_eq!(tool_call_params.arguments, Some(json!({"text": "mcp"})));
1408 tool_call_response
1409 .send(context_server::types::CallToolResponse {
1410 content: vec![context_server::types::ToolResponseContent::Text { text: "mcp".into() }],
1411 is_error: None,
1412 meta: None,
1413 structured_content: None,
1414 })
1415 .unwrap();
1416 cx.run_until_parked();
1417
1418 // Ensure the tool results were inserted with the correct names.
1419 let completion = fake_model.pending_completions().pop().unwrap();
1420 assert_eq!(
1421 completion.messages.last().unwrap().content,
1422 vec![
1423 MessageContent::ToolResult(LanguageModelToolResult {
1424 tool_use_id: "tool_3".into(),
1425 tool_name: "echo".into(),
1426 is_error: false,
1427 content: "native".into(),
1428 output: Some("native".into()),
1429 },),
1430 MessageContent::ToolResult(LanguageModelToolResult {
1431 tool_use_id: "tool_2".into(),
1432 tool_name: "test_server_echo".into(),
1433 is_error: false,
1434 content: "mcp".into(),
1435 output: Some("mcp".into()),
1436 },),
1437 ]
1438 );
1439 fake_model.end_last_completion_stream();
1440 events.collect::<Vec<_>>().await;
1441}
1442
1443#[gpui::test]
1444async fn test_mcp_tool_truncation(cx: &mut TestAppContext) {
1445 let ThreadTest {
1446 model,
1447 thread,
1448 context_server_store,
1449 fs,
1450 ..
1451 } = setup(cx, TestModel::Fake).await;
1452 let fake_model = model.as_fake();
1453
1454 // Set up a profile with all tools enabled
1455 fs.insert_file(
1456 paths::settings_file(),
1457 json!({
1458 "agent": {
1459 "profiles": {
1460 "test": {
1461 "name": "Test Profile",
1462 "enable_all_context_servers": true,
1463 "tools": {
1464 EchoTool::name(): true,
1465 DelayTool::name(): true,
1466 WordListTool::name(): true,
1467 ToolRequiringPermission::name(): true,
1468 InfiniteTool::name(): true,
1469 }
1470 },
1471 }
1472 }
1473 })
1474 .to_string()
1475 .into_bytes(),
1476 )
1477 .await;
1478 cx.run_until_parked();
1479
1480 thread.update(cx, |thread, cx| {
1481 thread.set_profile(AgentProfileId("test".into()), cx);
1482 thread.add_tool(EchoTool);
1483 thread.add_tool(DelayTool);
1484 thread.add_tool(WordListTool);
1485 thread.add_tool(ToolRequiringPermission);
1486 thread.add_tool(InfiniteTool);
1487 });
1488
1489 // Set up multiple context servers with some overlapping tool names
1490 let _server1_calls = setup_context_server(
1491 "xxx",
1492 vec![
1493 context_server::types::Tool {
1494 name: "echo".into(), // Conflicts with native EchoTool
1495 description: None,
1496 input_schema: serde_json::to_value(EchoTool::input_schema(
1497 LanguageModelToolSchemaFormat::JsonSchema,
1498 ))
1499 .unwrap(),
1500 output_schema: None,
1501 annotations: None,
1502 },
1503 context_server::types::Tool {
1504 name: "unique_tool_1".into(),
1505 description: None,
1506 input_schema: json!({"type": "object", "properties": {}}),
1507 output_schema: None,
1508 annotations: None,
1509 },
1510 ],
1511 &context_server_store,
1512 cx,
1513 );
1514
1515 let _server2_calls = setup_context_server(
1516 "yyy",
1517 vec![
1518 context_server::types::Tool {
1519 name: "echo".into(), // Also conflicts with native EchoTool
1520 description: None,
1521 input_schema: serde_json::to_value(EchoTool::input_schema(
1522 LanguageModelToolSchemaFormat::JsonSchema,
1523 ))
1524 .unwrap(),
1525 output_schema: None,
1526 annotations: None,
1527 },
1528 context_server::types::Tool {
1529 name: "unique_tool_2".into(),
1530 description: None,
1531 input_schema: json!({"type": "object", "properties": {}}),
1532 output_schema: None,
1533 annotations: None,
1534 },
1535 context_server::types::Tool {
1536 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1537 description: None,
1538 input_schema: json!({"type": "object", "properties": {}}),
1539 output_schema: None,
1540 annotations: None,
1541 },
1542 context_server::types::Tool {
1543 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1544 description: None,
1545 input_schema: json!({"type": "object", "properties": {}}),
1546 output_schema: None,
1547 annotations: None,
1548 },
1549 ],
1550 &context_server_store,
1551 cx,
1552 );
1553 let _server3_calls = setup_context_server(
1554 "zzz",
1555 vec![
1556 context_server::types::Tool {
1557 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1558 description: None,
1559 input_schema: json!({"type": "object", "properties": {}}),
1560 output_schema: None,
1561 annotations: None,
1562 },
1563 context_server::types::Tool {
1564 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1565 description: None,
1566 input_schema: json!({"type": "object", "properties": {}}),
1567 output_schema: None,
1568 annotations: None,
1569 },
1570 context_server::types::Tool {
1571 name: "c".repeat(MAX_TOOL_NAME_LENGTH + 1),
1572 description: None,
1573 input_schema: json!({"type": "object", "properties": {}}),
1574 output_schema: None,
1575 annotations: None,
1576 },
1577 ],
1578 &context_server_store,
1579 cx,
1580 );
1581
1582 thread
1583 .update(cx, |thread, cx| {
1584 thread.send(UserMessageId::new(), ["Go"], cx)
1585 })
1586 .unwrap();
1587 cx.run_until_parked();
1588 let completion = fake_model.pending_completions().pop().unwrap();
1589 assert_eq!(
1590 tool_names_for_completion(&completion),
1591 vec![
1592 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1593 "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
1594 "delay",
1595 "echo",
1596 "infinite",
1597 "tool_requiring_permission",
1598 "unique_tool_1",
1599 "unique_tool_2",
1600 "word_list",
1601 "xxx_echo",
1602 "y_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1603 "yyy_echo",
1604 "z_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1605 ]
1606 );
1607}
1608
1609#[gpui::test]
1610#[cfg_attr(not(feature = "e2e"), ignore)]
1611async fn test_cancellation(cx: &mut TestAppContext) {
1612 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1613
1614 let mut events = thread
1615 .update(cx, |thread, cx| {
1616 thread.add_tool(InfiniteTool);
1617 thread.add_tool(EchoTool);
1618 thread.send(
1619 UserMessageId::new(),
1620 ["Call the echo tool, then call the infinite tool, then explain their output"],
1621 cx,
1622 )
1623 })
1624 .unwrap();
1625
1626 // Wait until both tools are called.
1627 let mut expected_tools = vec!["Echo", "Infinite Tool"];
1628 let mut echo_id = None;
1629 let mut echo_completed = false;
1630 while let Some(event) = events.next().await {
1631 match event.unwrap() {
1632 ThreadEvent::ToolCall(tool_call) => {
1633 assert_eq!(tool_call.title, expected_tools.remove(0));
1634 if tool_call.title == "Echo" {
1635 echo_id = Some(tool_call.tool_call_id);
1636 }
1637 }
1638 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1639 acp::ToolCallUpdate {
1640 tool_call_id,
1641 fields:
1642 acp::ToolCallUpdateFields {
1643 status: Some(acp::ToolCallStatus::Completed),
1644 ..
1645 },
1646 ..
1647 },
1648 )) if Some(&tool_call_id) == echo_id.as_ref() => {
1649 echo_completed = true;
1650 }
1651 _ => {}
1652 }
1653
1654 if expected_tools.is_empty() && echo_completed {
1655 break;
1656 }
1657 }
1658
1659 // Cancel the current send and ensure that the event stream is closed, even
1660 // if one of the tools is still running.
1661 thread.update(cx, |thread, cx| thread.cancel(cx)).await;
1662 let events = events.collect::<Vec<_>>().await;
1663 let last_event = events.last();
1664 assert!(
1665 matches!(
1666 last_event,
1667 Some(Ok(ThreadEvent::Stop(acp::StopReason::Cancelled)))
1668 ),
1669 "unexpected event {last_event:?}"
1670 );
1671
1672 // Ensure we can still send a new message after cancellation.
1673 let events = thread
1674 .update(cx, |thread, cx| {
1675 thread.send(
1676 UserMessageId::new(),
1677 ["Testing: reply with 'Hello' then stop."],
1678 cx,
1679 )
1680 })
1681 .unwrap()
1682 .collect::<Vec<_>>()
1683 .await;
1684 thread.update(cx, |thread, _cx| {
1685 let message = thread.last_message().unwrap();
1686 let agent_message = message.as_agent_message().unwrap();
1687 assert_eq!(
1688 agent_message.content,
1689 vec![AgentMessageContent::Text("Hello".to_string())]
1690 );
1691 });
1692 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1693}
1694
1695#[gpui::test]
1696async fn test_terminal_tool_cancellation_captures_output(cx: &mut TestAppContext) {
1697 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1698 always_allow_tools(cx);
1699 let fake_model = model.as_fake();
1700
1701 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1702 let environment = Rc::new(FakeThreadEnvironment {
1703 handle: handle.clone(),
1704 });
1705
1706 let mut events = thread
1707 .update(cx, |thread, cx| {
1708 thread.add_tool(crate::TerminalTool::new(
1709 thread.project().clone(),
1710 environment,
1711 ));
1712 thread.send(UserMessageId::new(), ["run a command"], cx)
1713 })
1714 .unwrap();
1715
1716 cx.run_until_parked();
1717
1718 // Simulate the model calling the terminal tool
1719 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1720 LanguageModelToolUse {
1721 id: "terminal_tool_1".into(),
1722 name: "terminal".into(),
1723 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1724 input: json!({"command": "sleep 1000", "cd": "."}),
1725 is_input_complete: true,
1726 thought_signature: None,
1727 },
1728 ));
1729 fake_model.end_last_completion_stream();
1730
1731 // Wait for the terminal tool to start running
1732 wait_for_terminal_tool_started(&mut events, cx).await;
1733
1734 // Cancel the thread while the terminal is running
1735 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
1736
1737 // Collect remaining events, driving the executor to let cancellation complete
1738 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1739
1740 // Verify the terminal was killed
1741 assert!(
1742 handle.was_killed(),
1743 "expected terminal handle to be killed on cancellation"
1744 );
1745
1746 // Verify we got a cancellation stop event
1747 assert_eq!(
1748 stop_events(remaining_events),
1749 vec![acp::StopReason::Cancelled],
1750 );
1751
1752 // Verify the tool result contains the terminal output, not just "Tool canceled by user"
1753 thread.update(cx, |thread, _cx| {
1754 let message = thread.last_message().unwrap();
1755 let agent_message = message.as_agent_message().unwrap();
1756
1757 let tool_use = agent_message
1758 .content
1759 .iter()
1760 .find_map(|content| match content {
1761 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
1762 _ => None,
1763 })
1764 .expect("expected tool use in agent message");
1765
1766 let tool_result = agent_message
1767 .tool_results
1768 .get(&tool_use.id)
1769 .expect("expected tool result");
1770
1771 let result_text = match &tool_result.content {
1772 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
1773 _ => panic!("expected text content in tool result"),
1774 };
1775
1776 // "partial output" comes from FakeTerminalHandle's output field
1777 assert!(
1778 result_text.contains("partial output"),
1779 "expected tool result to contain terminal output, got: {result_text}"
1780 );
1781 // Match the actual format from process_content in terminal_tool.rs
1782 assert!(
1783 result_text.contains("The user stopped this command"),
1784 "expected tool result to indicate user stopped, got: {result_text}"
1785 );
1786 });
1787
1788 // Verify we can send a new message after cancellation
1789 verify_thread_recovery(&thread, &fake_model, cx).await;
1790}
1791
1792#[gpui::test]
1793async fn test_cancellation_aware_tool_responds_to_cancellation(cx: &mut TestAppContext) {
1794 // This test verifies that tools which properly handle cancellation via
1795 // `event_stream.cancelled_by_user()` (like edit_file_tool) respond promptly
1796 // to cancellation and report that they were cancelled.
1797 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1798 always_allow_tools(cx);
1799 let fake_model = model.as_fake();
1800
1801 let (tool, was_cancelled) = CancellationAwareTool::new();
1802
1803 let mut events = thread
1804 .update(cx, |thread, cx| {
1805 thread.add_tool(tool);
1806 thread.send(
1807 UserMessageId::new(),
1808 ["call the cancellation aware tool"],
1809 cx,
1810 )
1811 })
1812 .unwrap();
1813
1814 cx.run_until_parked();
1815
1816 // Simulate the model calling the cancellation-aware tool
1817 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1818 LanguageModelToolUse {
1819 id: "cancellation_aware_1".into(),
1820 name: "cancellation_aware".into(),
1821 raw_input: r#"{}"#.into(),
1822 input: json!({}),
1823 is_input_complete: true,
1824 thought_signature: None,
1825 },
1826 ));
1827 fake_model.end_last_completion_stream();
1828
1829 cx.run_until_parked();
1830
1831 // Wait for the tool call to be reported
1832 let mut tool_started = false;
1833 let deadline = cx.executor().num_cpus() * 100;
1834 for _ in 0..deadline {
1835 cx.run_until_parked();
1836
1837 while let Some(Some(event)) = events.next().now_or_never() {
1838 if let Ok(ThreadEvent::ToolCall(tool_call)) = &event {
1839 if tool_call.title == "Cancellation Aware Tool" {
1840 tool_started = true;
1841 break;
1842 }
1843 }
1844 }
1845
1846 if tool_started {
1847 break;
1848 }
1849
1850 cx.background_executor
1851 .timer(Duration::from_millis(10))
1852 .await;
1853 }
1854 assert!(tool_started, "expected cancellation aware tool to start");
1855
1856 // Cancel the thread and wait for it to complete
1857 let cancel_task = thread.update(cx, |thread, cx| thread.cancel(cx));
1858
1859 // The cancel task should complete promptly because the tool handles cancellation
1860 let timeout = cx.background_executor.timer(Duration::from_secs(5));
1861 futures::select! {
1862 _ = cancel_task.fuse() => {}
1863 _ = timeout.fuse() => {
1864 panic!("cancel task timed out - tool did not respond to cancellation");
1865 }
1866 }
1867
1868 // Verify the tool detected cancellation via its flag
1869 assert!(
1870 was_cancelled.load(std::sync::atomic::Ordering::SeqCst),
1871 "tool should have detected cancellation via event_stream.cancelled_by_user()"
1872 );
1873
1874 // Collect remaining events
1875 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1876
1877 // Verify we got a cancellation stop event
1878 assert_eq!(
1879 stop_events(remaining_events),
1880 vec![acp::StopReason::Cancelled],
1881 );
1882
1883 // Verify we can send a new message after cancellation
1884 verify_thread_recovery(&thread, &fake_model, cx).await;
1885}
1886
1887/// Helper to verify thread can recover after cancellation by sending a simple message.
1888async fn verify_thread_recovery(
1889 thread: &Entity<Thread>,
1890 fake_model: &FakeLanguageModel,
1891 cx: &mut TestAppContext,
1892) {
1893 let events = thread
1894 .update(cx, |thread, cx| {
1895 thread.send(
1896 UserMessageId::new(),
1897 ["Testing: reply with 'Hello' then stop."],
1898 cx,
1899 )
1900 })
1901 .unwrap();
1902 cx.run_until_parked();
1903 fake_model.send_last_completion_stream_text_chunk("Hello");
1904 fake_model
1905 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
1906 fake_model.end_last_completion_stream();
1907
1908 let events = events.collect::<Vec<_>>().await;
1909 thread.update(cx, |thread, _cx| {
1910 let message = thread.last_message().unwrap();
1911 let agent_message = message.as_agent_message().unwrap();
1912 assert_eq!(
1913 agent_message.content,
1914 vec![AgentMessageContent::Text("Hello".to_string())]
1915 );
1916 });
1917 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1918}
1919
1920/// Waits for a terminal tool to start by watching for a ToolCallUpdate with terminal content.
1921async fn wait_for_terminal_tool_started(
1922 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1923 cx: &mut TestAppContext,
1924) {
1925 let deadline = cx.executor().num_cpus() * 100; // Scale with available parallelism
1926 for _ in 0..deadline {
1927 cx.run_until_parked();
1928
1929 while let Some(Some(event)) = events.next().now_or_never() {
1930 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1931 update,
1932 ))) = &event
1933 {
1934 if update.fields.content.as_ref().is_some_and(|content| {
1935 content
1936 .iter()
1937 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1938 }) {
1939 return;
1940 }
1941 }
1942 }
1943
1944 cx.background_executor
1945 .timer(Duration::from_millis(10))
1946 .await;
1947 }
1948 panic!("terminal tool did not start within the expected time");
1949}
1950
1951/// Collects events until a Stop event is received, driving the executor to completion.
1952async fn collect_events_until_stop(
1953 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1954 cx: &mut TestAppContext,
1955) -> Vec<Result<ThreadEvent>> {
1956 let mut collected = Vec::new();
1957 let deadline = cx.executor().num_cpus() * 200;
1958
1959 for _ in 0..deadline {
1960 cx.executor().advance_clock(Duration::from_millis(10));
1961 cx.run_until_parked();
1962
1963 while let Some(Some(event)) = events.next().now_or_never() {
1964 let is_stop = matches!(&event, Ok(ThreadEvent::Stop(_)));
1965 collected.push(event);
1966 if is_stop {
1967 return collected;
1968 }
1969 }
1970 }
1971 panic!(
1972 "did not receive Stop event within the expected time; collected {} events",
1973 collected.len()
1974 );
1975}
1976
1977#[gpui::test]
1978async fn test_truncate_while_terminal_tool_running(cx: &mut TestAppContext) {
1979 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1980 always_allow_tools(cx);
1981 let fake_model = model.as_fake();
1982
1983 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1984 let environment = Rc::new(FakeThreadEnvironment {
1985 handle: handle.clone(),
1986 });
1987
1988 let message_id = UserMessageId::new();
1989 let mut events = thread
1990 .update(cx, |thread, cx| {
1991 thread.add_tool(crate::TerminalTool::new(
1992 thread.project().clone(),
1993 environment,
1994 ));
1995 thread.send(message_id.clone(), ["run a command"], cx)
1996 })
1997 .unwrap();
1998
1999 cx.run_until_parked();
2000
2001 // Simulate the model calling the terminal tool
2002 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2003 LanguageModelToolUse {
2004 id: "terminal_tool_1".into(),
2005 name: "terminal".into(),
2006 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2007 input: json!({"command": "sleep 1000", "cd": "."}),
2008 is_input_complete: true,
2009 thought_signature: None,
2010 },
2011 ));
2012 fake_model.end_last_completion_stream();
2013
2014 // Wait for the terminal tool to start running
2015 wait_for_terminal_tool_started(&mut events, cx).await;
2016
2017 // Truncate the thread while the terminal is running
2018 thread
2019 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2020 .unwrap();
2021
2022 // Drive the executor to let cancellation complete
2023 let _ = collect_events_until_stop(&mut events, cx).await;
2024
2025 // Verify the terminal was killed
2026 assert!(
2027 handle.was_killed(),
2028 "expected terminal handle to be killed on truncate"
2029 );
2030
2031 // Verify the thread is empty after truncation
2032 thread.update(cx, |thread, _cx| {
2033 assert_eq!(
2034 thread.to_markdown(),
2035 "",
2036 "expected thread to be empty after truncating the only message"
2037 );
2038 });
2039
2040 // Verify we can send a new message after truncation
2041 verify_thread_recovery(&thread, &fake_model, cx).await;
2042}
2043
2044#[gpui::test]
2045async fn test_cancel_multiple_concurrent_terminal_tools(cx: &mut TestAppContext) {
2046 // Tests that cancellation properly kills all running terminal tools when multiple are active.
2047 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2048 always_allow_tools(cx);
2049 let fake_model = model.as_fake();
2050
2051 let environment = Rc::new(MultiTerminalEnvironment::new());
2052
2053 let mut events = thread
2054 .update(cx, |thread, cx| {
2055 thread.add_tool(crate::TerminalTool::new(
2056 thread.project().clone(),
2057 environment.clone(),
2058 ));
2059 thread.send(UserMessageId::new(), ["run multiple commands"], cx)
2060 })
2061 .unwrap();
2062
2063 cx.run_until_parked();
2064
2065 // Simulate the model calling two terminal tools
2066 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2067 LanguageModelToolUse {
2068 id: "terminal_tool_1".into(),
2069 name: "terminal".into(),
2070 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2071 input: json!({"command": "sleep 1000", "cd": "."}),
2072 is_input_complete: true,
2073 thought_signature: None,
2074 },
2075 ));
2076 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2077 LanguageModelToolUse {
2078 id: "terminal_tool_2".into(),
2079 name: "terminal".into(),
2080 raw_input: r#"{"command": "sleep 2000", "cd": "."}"#.into(),
2081 input: json!({"command": "sleep 2000", "cd": "."}),
2082 is_input_complete: true,
2083 thought_signature: None,
2084 },
2085 ));
2086 fake_model.end_last_completion_stream();
2087
2088 // Wait for both terminal tools to start by counting terminal content updates
2089 let mut terminals_started = 0;
2090 let deadline = cx.executor().num_cpus() * 100;
2091 for _ in 0..deadline {
2092 cx.run_until_parked();
2093
2094 while let Some(Some(event)) = events.next().now_or_never() {
2095 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
2096 update,
2097 ))) = &event
2098 {
2099 if update.fields.content.as_ref().is_some_and(|content| {
2100 content
2101 .iter()
2102 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
2103 }) {
2104 terminals_started += 1;
2105 if terminals_started >= 2 {
2106 break;
2107 }
2108 }
2109 }
2110 }
2111 if terminals_started >= 2 {
2112 break;
2113 }
2114
2115 cx.background_executor
2116 .timer(Duration::from_millis(10))
2117 .await;
2118 }
2119 assert!(
2120 terminals_started >= 2,
2121 "expected 2 terminal tools to start, got {terminals_started}"
2122 );
2123
2124 // Cancel the thread while both terminals are running
2125 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
2126
2127 // Collect remaining events
2128 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2129
2130 // Verify both terminal handles were killed
2131 let handles = environment.handles();
2132 assert_eq!(
2133 handles.len(),
2134 2,
2135 "expected 2 terminal handles to be created"
2136 );
2137 assert!(
2138 handles[0].was_killed(),
2139 "expected first terminal handle to be killed on cancellation"
2140 );
2141 assert!(
2142 handles[1].was_killed(),
2143 "expected second terminal handle to be killed on cancellation"
2144 );
2145
2146 // Verify we got a cancellation stop event
2147 assert_eq!(
2148 stop_events(remaining_events),
2149 vec![acp::StopReason::Cancelled],
2150 );
2151}
2152
2153#[gpui::test]
2154async fn test_terminal_tool_stopped_via_terminal_card_button(cx: &mut TestAppContext) {
2155 // Tests that clicking the stop button on the terminal card (as opposed to the main
2156 // cancel button) properly reports user stopped via the was_stopped_by_user path.
2157 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2158 always_allow_tools(cx);
2159 let fake_model = model.as_fake();
2160
2161 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2162 let environment = Rc::new(FakeThreadEnvironment {
2163 handle: handle.clone(),
2164 });
2165
2166 let mut events = thread
2167 .update(cx, |thread, cx| {
2168 thread.add_tool(crate::TerminalTool::new(
2169 thread.project().clone(),
2170 environment,
2171 ));
2172 thread.send(UserMessageId::new(), ["run a command"], cx)
2173 })
2174 .unwrap();
2175
2176 cx.run_until_parked();
2177
2178 // Simulate the model calling the terminal tool
2179 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2180 LanguageModelToolUse {
2181 id: "terminal_tool_1".into(),
2182 name: "terminal".into(),
2183 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2184 input: json!({"command": "sleep 1000", "cd": "."}),
2185 is_input_complete: true,
2186 thought_signature: None,
2187 },
2188 ));
2189 fake_model.end_last_completion_stream();
2190
2191 // Wait for the terminal tool to start running
2192 wait_for_terminal_tool_started(&mut events, cx).await;
2193
2194 // Simulate user clicking stop on the terminal card itself.
2195 // This sets the flag and signals exit (simulating what the real UI would do).
2196 handle.set_stopped_by_user(true);
2197 handle.killed.store(true, Ordering::SeqCst);
2198 handle.signal_exit();
2199
2200 // Wait for the tool to complete
2201 cx.run_until_parked();
2202
2203 // The thread continues after tool completion - simulate the model ending its turn
2204 fake_model
2205 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2206 fake_model.end_last_completion_stream();
2207
2208 // Collect remaining events
2209 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2210
2211 // Verify we got an EndTurn (not Cancelled, since we didn't cancel the thread)
2212 assert_eq!(
2213 stop_events(remaining_events),
2214 vec![acp::StopReason::EndTurn],
2215 );
2216
2217 // Verify the tool result indicates user stopped
2218 thread.update(cx, |thread, _cx| {
2219 let message = thread.last_message().unwrap();
2220 let agent_message = message.as_agent_message().unwrap();
2221
2222 let tool_use = agent_message
2223 .content
2224 .iter()
2225 .find_map(|content| match content {
2226 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2227 _ => None,
2228 })
2229 .expect("expected tool use in agent message");
2230
2231 let tool_result = agent_message
2232 .tool_results
2233 .get(&tool_use.id)
2234 .expect("expected tool result");
2235
2236 let result_text = match &tool_result.content {
2237 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2238 _ => panic!("expected text content in tool result"),
2239 };
2240
2241 assert!(
2242 result_text.contains("The user stopped this command"),
2243 "expected tool result to indicate user stopped, got: {result_text}"
2244 );
2245 });
2246}
2247
2248#[gpui::test]
2249async fn test_terminal_tool_timeout_expires(cx: &mut TestAppContext) {
2250 // Tests that when a timeout is configured and expires, the tool result indicates timeout.
2251 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2252 always_allow_tools(cx);
2253 let fake_model = model.as_fake();
2254
2255 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2256 let environment = Rc::new(FakeThreadEnvironment {
2257 handle: handle.clone(),
2258 });
2259
2260 let mut events = thread
2261 .update(cx, |thread, cx| {
2262 thread.add_tool(crate::TerminalTool::new(
2263 thread.project().clone(),
2264 environment,
2265 ));
2266 thread.send(UserMessageId::new(), ["run a command with timeout"], cx)
2267 })
2268 .unwrap();
2269
2270 cx.run_until_parked();
2271
2272 // Simulate the model calling the terminal tool with a short timeout
2273 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2274 LanguageModelToolUse {
2275 id: "terminal_tool_1".into(),
2276 name: "terminal".into(),
2277 raw_input: r#"{"command": "sleep 1000", "cd": ".", "timeout_ms": 100}"#.into(),
2278 input: json!({"command": "sleep 1000", "cd": ".", "timeout_ms": 100}),
2279 is_input_complete: true,
2280 thought_signature: None,
2281 },
2282 ));
2283 fake_model.end_last_completion_stream();
2284
2285 // Wait for the terminal tool to start running
2286 wait_for_terminal_tool_started(&mut events, cx).await;
2287
2288 // Advance clock past the timeout
2289 cx.executor().advance_clock(Duration::from_millis(200));
2290 cx.run_until_parked();
2291
2292 // The thread continues after tool completion - simulate the model ending its turn
2293 fake_model
2294 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2295 fake_model.end_last_completion_stream();
2296
2297 // Collect remaining events
2298 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2299
2300 // Verify the terminal was killed due to timeout
2301 assert!(
2302 handle.was_killed(),
2303 "expected terminal handle to be killed on timeout"
2304 );
2305
2306 // Verify we got an EndTurn (the tool completed, just with timeout)
2307 assert_eq!(
2308 stop_events(remaining_events),
2309 vec![acp::StopReason::EndTurn],
2310 );
2311
2312 // Verify the tool result indicates timeout, not user stopped
2313 thread.update(cx, |thread, _cx| {
2314 let message = thread.last_message().unwrap();
2315 let agent_message = message.as_agent_message().unwrap();
2316
2317 let tool_use = agent_message
2318 .content
2319 .iter()
2320 .find_map(|content| match content {
2321 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2322 _ => None,
2323 })
2324 .expect("expected tool use in agent message");
2325
2326 let tool_result = agent_message
2327 .tool_results
2328 .get(&tool_use.id)
2329 .expect("expected tool result");
2330
2331 let result_text = match &tool_result.content {
2332 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2333 _ => panic!("expected text content in tool result"),
2334 };
2335
2336 assert!(
2337 result_text.contains("timed out"),
2338 "expected tool result to indicate timeout, got: {result_text}"
2339 );
2340 assert!(
2341 !result_text.contains("The user stopped"),
2342 "tool result should not mention user stopped when it timed out, got: {result_text}"
2343 );
2344 });
2345}
2346
2347#[gpui::test]
2348async fn test_in_progress_send_canceled_by_next_send(cx: &mut TestAppContext) {
2349 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2350 let fake_model = model.as_fake();
2351
2352 let events_1 = thread
2353 .update(cx, |thread, cx| {
2354 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2355 })
2356 .unwrap();
2357 cx.run_until_parked();
2358 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2359 cx.run_until_parked();
2360
2361 let events_2 = thread
2362 .update(cx, |thread, cx| {
2363 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2364 })
2365 .unwrap();
2366 cx.run_until_parked();
2367 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2368 fake_model
2369 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2370 fake_model.end_last_completion_stream();
2371
2372 let events_1 = events_1.collect::<Vec<_>>().await;
2373 assert_eq!(stop_events(events_1), vec![acp::StopReason::Cancelled]);
2374 let events_2 = events_2.collect::<Vec<_>>().await;
2375 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2376}
2377
2378#[gpui::test]
2379async fn test_subsequent_successful_sends_dont_cancel(cx: &mut TestAppContext) {
2380 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2381 let fake_model = model.as_fake();
2382
2383 let events_1 = thread
2384 .update(cx, |thread, cx| {
2385 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2386 })
2387 .unwrap();
2388 cx.run_until_parked();
2389 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2390 fake_model
2391 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2392 fake_model.end_last_completion_stream();
2393 let events_1 = events_1.collect::<Vec<_>>().await;
2394
2395 let events_2 = thread
2396 .update(cx, |thread, cx| {
2397 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2398 })
2399 .unwrap();
2400 cx.run_until_parked();
2401 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2402 fake_model
2403 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2404 fake_model.end_last_completion_stream();
2405 let events_2 = events_2.collect::<Vec<_>>().await;
2406
2407 assert_eq!(stop_events(events_1), vec![acp::StopReason::EndTurn]);
2408 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2409}
2410
2411#[gpui::test]
2412async fn test_refusal(cx: &mut TestAppContext) {
2413 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2414 let fake_model = model.as_fake();
2415
2416 let events = thread
2417 .update(cx, |thread, cx| {
2418 thread.send(UserMessageId::new(), ["Hello"], cx)
2419 })
2420 .unwrap();
2421 cx.run_until_parked();
2422 thread.read_with(cx, |thread, _| {
2423 assert_eq!(
2424 thread.to_markdown(),
2425 indoc! {"
2426 ## User
2427
2428 Hello
2429 "}
2430 );
2431 });
2432
2433 fake_model.send_last_completion_stream_text_chunk("Hey!");
2434 cx.run_until_parked();
2435 thread.read_with(cx, |thread, _| {
2436 assert_eq!(
2437 thread.to_markdown(),
2438 indoc! {"
2439 ## User
2440
2441 Hello
2442
2443 ## Assistant
2444
2445 Hey!
2446 "}
2447 );
2448 });
2449
2450 // If the model refuses to continue, the thread should remove all the messages after the last user message.
2451 fake_model
2452 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::Refusal));
2453 let events = events.collect::<Vec<_>>().await;
2454 assert_eq!(stop_events(events), vec![acp::StopReason::Refusal]);
2455 thread.read_with(cx, |thread, _| {
2456 assert_eq!(thread.to_markdown(), "");
2457 });
2458}
2459
2460#[gpui::test]
2461async fn test_truncate_first_message(cx: &mut TestAppContext) {
2462 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2463 let fake_model = model.as_fake();
2464
2465 let message_id = UserMessageId::new();
2466 thread
2467 .update(cx, |thread, cx| {
2468 thread.send(message_id.clone(), ["Hello"], cx)
2469 })
2470 .unwrap();
2471 cx.run_until_parked();
2472 thread.read_with(cx, |thread, _| {
2473 assert_eq!(
2474 thread.to_markdown(),
2475 indoc! {"
2476 ## User
2477
2478 Hello
2479 "}
2480 );
2481 assert_eq!(thread.latest_token_usage(), None);
2482 });
2483
2484 fake_model.send_last_completion_stream_text_chunk("Hey!");
2485 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2486 language_model::TokenUsage {
2487 input_tokens: 32_000,
2488 output_tokens: 16_000,
2489 cache_creation_input_tokens: 0,
2490 cache_read_input_tokens: 0,
2491 },
2492 ));
2493 cx.run_until_parked();
2494 thread.read_with(cx, |thread, _| {
2495 assert_eq!(
2496 thread.to_markdown(),
2497 indoc! {"
2498 ## User
2499
2500 Hello
2501
2502 ## Assistant
2503
2504 Hey!
2505 "}
2506 );
2507 assert_eq!(
2508 thread.latest_token_usage(),
2509 Some(acp_thread::TokenUsage {
2510 used_tokens: 32_000 + 16_000,
2511 max_tokens: 1_000_000,
2512 output_tokens: 16_000,
2513 })
2514 );
2515 });
2516
2517 thread
2518 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2519 .unwrap();
2520 cx.run_until_parked();
2521 thread.read_with(cx, |thread, _| {
2522 assert_eq!(thread.to_markdown(), "");
2523 assert_eq!(thread.latest_token_usage(), None);
2524 });
2525
2526 // Ensure we can still send a new message after truncation.
2527 thread
2528 .update(cx, |thread, cx| {
2529 thread.send(UserMessageId::new(), ["Hi"], cx)
2530 })
2531 .unwrap();
2532 thread.update(cx, |thread, _cx| {
2533 assert_eq!(
2534 thread.to_markdown(),
2535 indoc! {"
2536 ## User
2537
2538 Hi
2539 "}
2540 );
2541 });
2542 cx.run_until_parked();
2543 fake_model.send_last_completion_stream_text_chunk("Ahoy!");
2544 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2545 language_model::TokenUsage {
2546 input_tokens: 40_000,
2547 output_tokens: 20_000,
2548 cache_creation_input_tokens: 0,
2549 cache_read_input_tokens: 0,
2550 },
2551 ));
2552 cx.run_until_parked();
2553 thread.read_with(cx, |thread, _| {
2554 assert_eq!(
2555 thread.to_markdown(),
2556 indoc! {"
2557 ## User
2558
2559 Hi
2560
2561 ## Assistant
2562
2563 Ahoy!
2564 "}
2565 );
2566
2567 assert_eq!(
2568 thread.latest_token_usage(),
2569 Some(acp_thread::TokenUsage {
2570 used_tokens: 40_000 + 20_000,
2571 max_tokens: 1_000_000,
2572 output_tokens: 20_000,
2573 })
2574 );
2575 });
2576}
2577
2578#[gpui::test]
2579async fn test_truncate_second_message(cx: &mut TestAppContext) {
2580 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2581 let fake_model = model.as_fake();
2582
2583 thread
2584 .update(cx, |thread, cx| {
2585 thread.send(UserMessageId::new(), ["Message 1"], cx)
2586 })
2587 .unwrap();
2588 cx.run_until_parked();
2589 fake_model.send_last_completion_stream_text_chunk("Message 1 response");
2590 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2591 language_model::TokenUsage {
2592 input_tokens: 32_000,
2593 output_tokens: 16_000,
2594 cache_creation_input_tokens: 0,
2595 cache_read_input_tokens: 0,
2596 },
2597 ));
2598 fake_model.end_last_completion_stream();
2599 cx.run_until_parked();
2600
2601 let assert_first_message_state = |cx: &mut TestAppContext| {
2602 thread.clone().read_with(cx, |thread, _| {
2603 assert_eq!(
2604 thread.to_markdown(),
2605 indoc! {"
2606 ## User
2607
2608 Message 1
2609
2610 ## Assistant
2611
2612 Message 1 response
2613 "}
2614 );
2615
2616 assert_eq!(
2617 thread.latest_token_usage(),
2618 Some(acp_thread::TokenUsage {
2619 used_tokens: 32_000 + 16_000,
2620 max_tokens: 1_000_000,
2621 output_tokens: 16_000,
2622 })
2623 );
2624 });
2625 };
2626
2627 assert_first_message_state(cx);
2628
2629 let second_message_id = UserMessageId::new();
2630 thread
2631 .update(cx, |thread, cx| {
2632 thread.send(second_message_id.clone(), ["Message 2"], cx)
2633 })
2634 .unwrap();
2635 cx.run_until_parked();
2636
2637 fake_model.send_last_completion_stream_text_chunk("Message 2 response");
2638 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2639 language_model::TokenUsage {
2640 input_tokens: 40_000,
2641 output_tokens: 20_000,
2642 cache_creation_input_tokens: 0,
2643 cache_read_input_tokens: 0,
2644 },
2645 ));
2646 fake_model.end_last_completion_stream();
2647 cx.run_until_parked();
2648
2649 thread.read_with(cx, |thread, _| {
2650 assert_eq!(
2651 thread.to_markdown(),
2652 indoc! {"
2653 ## User
2654
2655 Message 1
2656
2657 ## Assistant
2658
2659 Message 1 response
2660
2661 ## User
2662
2663 Message 2
2664
2665 ## Assistant
2666
2667 Message 2 response
2668 "}
2669 );
2670
2671 assert_eq!(
2672 thread.latest_token_usage(),
2673 Some(acp_thread::TokenUsage {
2674 used_tokens: 40_000 + 20_000,
2675 max_tokens: 1_000_000,
2676 output_tokens: 20_000,
2677 })
2678 );
2679 });
2680
2681 thread
2682 .update(cx, |thread, cx| thread.truncate(second_message_id, cx))
2683 .unwrap();
2684 cx.run_until_parked();
2685
2686 assert_first_message_state(cx);
2687}
2688
2689#[gpui::test]
2690async fn test_title_generation(cx: &mut TestAppContext) {
2691 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2692 let fake_model = model.as_fake();
2693
2694 let summary_model = Arc::new(FakeLanguageModel::default());
2695 thread.update(cx, |thread, cx| {
2696 thread.set_summarization_model(Some(summary_model.clone()), cx)
2697 });
2698
2699 let send = thread
2700 .update(cx, |thread, cx| {
2701 thread.send(UserMessageId::new(), ["Hello"], cx)
2702 })
2703 .unwrap();
2704 cx.run_until_parked();
2705
2706 fake_model.send_last_completion_stream_text_chunk("Hey!");
2707 fake_model.end_last_completion_stream();
2708 cx.run_until_parked();
2709 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "New Thread"));
2710
2711 // Ensure the summary model has been invoked to generate a title.
2712 summary_model.send_last_completion_stream_text_chunk("Hello ");
2713 summary_model.send_last_completion_stream_text_chunk("world\nG");
2714 summary_model.send_last_completion_stream_text_chunk("oodnight Moon");
2715 summary_model.end_last_completion_stream();
2716 send.collect::<Vec<_>>().await;
2717 cx.run_until_parked();
2718 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2719
2720 // Send another message, ensuring no title is generated this time.
2721 let send = thread
2722 .update(cx, |thread, cx| {
2723 thread.send(UserMessageId::new(), ["Hello again"], cx)
2724 })
2725 .unwrap();
2726 cx.run_until_parked();
2727 fake_model.send_last_completion_stream_text_chunk("Hey again!");
2728 fake_model.end_last_completion_stream();
2729 cx.run_until_parked();
2730 assert_eq!(summary_model.pending_completions(), Vec::new());
2731 send.collect::<Vec<_>>().await;
2732 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2733}
2734
2735#[gpui::test]
2736async fn test_building_request_with_pending_tools(cx: &mut TestAppContext) {
2737 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2738 let fake_model = model.as_fake();
2739
2740 let _events = thread
2741 .update(cx, |thread, cx| {
2742 thread.add_tool(ToolRequiringPermission);
2743 thread.add_tool(EchoTool);
2744 thread.send(UserMessageId::new(), ["Hey!"], cx)
2745 })
2746 .unwrap();
2747 cx.run_until_parked();
2748
2749 let permission_tool_use = LanguageModelToolUse {
2750 id: "tool_id_1".into(),
2751 name: ToolRequiringPermission::name().into(),
2752 raw_input: "{}".into(),
2753 input: json!({}),
2754 is_input_complete: true,
2755 thought_signature: None,
2756 };
2757 let echo_tool_use = LanguageModelToolUse {
2758 id: "tool_id_2".into(),
2759 name: EchoTool::name().into(),
2760 raw_input: json!({"text": "test"}).to_string(),
2761 input: json!({"text": "test"}),
2762 is_input_complete: true,
2763 thought_signature: None,
2764 };
2765 fake_model.send_last_completion_stream_text_chunk("Hi!");
2766 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2767 permission_tool_use,
2768 ));
2769 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2770 echo_tool_use.clone(),
2771 ));
2772 fake_model.end_last_completion_stream();
2773 cx.run_until_parked();
2774
2775 // Ensure pending tools are skipped when building a request.
2776 let request = thread
2777 .read_with(cx, |thread, cx| {
2778 thread.build_completion_request(CompletionIntent::EditFile, cx)
2779 })
2780 .unwrap();
2781 assert_eq!(
2782 request.messages[1..],
2783 vec![
2784 LanguageModelRequestMessage {
2785 role: Role::User,
2786 content: vec!["Hey!".into()],
2787 cache: true,
2788 reasoning_details: None,
2789 },
2790 LanguageModelRequestMessage {
2791 role: Role::Assistant,
2792 content: vec![
2793 MessageContent::Text("Hi!".into()),
2794 MessageContent::ToolUse(echo_tool_use.clone())
2795 ],
2796 cache: false,
2797 reasoning_details: None,
2798 },
2799 LanguageModelRequestMessage {
2800 role: Role::User,
2801 content: vec![MessageContent::ToolResult(LanguageModelToolResult {
2802 tool_use_id: echo_tool_use.id.clone(),
2803 tool_name: echo_tool_use.name,
2804 is_error: false,
2805 content: "test".into(),
2806 output: Some("test".into())
2807 })],
2808 cache: false,
2809 reasoning_details: None,
2810 },
2811 ],
2812 );
2813}
2814
2815#[gpui::test]
2816async fn test_agent_connection(cx: &mut TestAppContext) {
2817 cx.update(settings::init);
2818 let templates = Templates::new();
2819
2820 // Initialize language model system with test provider
2821 cx.update(|cx| {
2822 gpui_tokio::init(cx);
2823
2824 let http_client = FakeHttpClient::with_404_response();
2825 let clock = Arc::new(clock::FakeSystemClock::new());
2826 let client = Client::new(clock, http_client, cx);
2827 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
2828 language_model::init(client.clone(), cx);
2829 language_models::init(user_store, client.clone(), cx);
2830 LanguageModelRegistry::test(cx);
2831 });
2832 cx.executor().forbid_parking();
2833
2834 // Create a project for new_thread
2835 let fake_fs = cx.update(|cx| fs::FakeFs::new(cx.background_executor().clone()));
2836 fake_fs.insert_tree(path!("/test"), json!({})).await;
2837 let project = Project::test(fake_fs.clone(), [Path::new("/test")], cx).await;
2838 let cwd = Path::new("/test");
2839 let thread_store = cx.new(|cx| ThreadStore::new(cx));
2840
2841 // Create agent and connection
2842 let agent = NativeAgent::new(
2843 project.clone(),
2844 thread_store,
2845 templates.clone(),
2846 None,
2847 fake_fs.clone(),
2848 &mut cx.to_async(),
2849 )
2850 .await
2851 .unwrap();
2852 let connection = NativeAgentConnection(agent.clone());
2853
2854 // Create a thread using new_thread
2855 let connection_rc = Rc::new(connection.clone());
2856 let acp_thread = cx
2857 .update(|cx| connection_rc.new_thread(project, cwd, cx))
2858 .await
2859 .expect("new_thread should succeed");
2860
2861 // Get the session_id from the AcpThread
2862 let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
2863
2864 // Test model_selector returns Some
2865 let selector_opt = connection.model_selector(&session_id);
2866 assert!(
2867 selector_opt.is_some(),
2868 "agent should always support ModelSelector"
2869 );
2870 let selector = selector_opt.unwrap();
2871
2872 // Test list_models
2873 let listed_models = cx
2874 .update(|cx| selector.list_models(cx))
2875 .await
2876 .expect("list_models should succeed");
2877 let AgentModelList::Grouped(listed_models) = listed_models else {
2878 panic!("Unexpected model list type");
2879 };
2880 assert!(!listed_models.is_empty(), "should have at least one model");
2881 assert_eq!(
2882 listed_models[&AgentModelGroupName("Fake".into())][0]
2883 .id
2884 .0
2885 .as_ref(),
2886 "fake/fake"
2887 );
2888
2889 // Test selected_model returns the default
2890 let model = cx
2891 .update(|cx| selector.selected_model(cx))
2892 .await
2893 .expect("selected_model should succeed");
2894 let model = cx
2895 .update(|cx| agent.read(cx).models().model_from_id(&model.id))
2896 .unwrap();
2897 let model = model.as_fake();
2898 assert_eq!(model.id().0, "fake", "should return default model");
2899
2900 let request = acp_thread.update(cx, |thread, cx| thread.send(vec!["abc".into()], cx));
2901 cx.run_until_parked();
2902 model.send_last_completion_stream_text_chunk("def");
2903 cx.run_until_parked();
2904 acp_thread.read_with(cx, |thread, cx| {
2905 assert_eq!(
2906 thread.to_markdown(cx),
2907 indoc! {"
2908 ## User
2909
2910 abc
2911
2912 ## Assistant
2913
2914 def
2915
2916 "}
2917 )
2918 });
2919
2920 // Test cancel
2921 cx.update(|cx| connection.cancel(&session_id, cx));
2922 request.await.expect("prompt should fail gracefully");
2923
2924 // Ensure that dropping the ACP thread causes the native thread to be
2925 // dropped as well.
2926 cx.update(|_| drop(acp_thread));
2927 let result = cx
2928 .update(|cx| {
2929 connection.prompt(
2930 Some(acp_thread::UserMessageId::new()),
2931 acp::PromptRequest::new(session_id.clone(), vec!["ghi".into()]),
2932 cx,
2933 )
2934 })
2935 .await;
2936 assert_eq!(
2937 result.as_ref().unwrap_err().to_string(),
2938 "Session not found",
2939 "unexpected result: {:?}",
2940 result
2941 );
2942}
2943
2944#[gpui::test]
2945async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
2946 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2947 thread.update(cx, |thread, _cx| thread.add_tool(ThinkingTool));
2948 let fake_model = model.as_fake();
2949
2950 let mut events = thread
2951 .update(cx, |thread, cx| {
2952 thread.send(UserMessageId::new(), ["Think"], cx)
2953 })
2954 .unwrap();
2955 cx.run_until_parked();
2956
2957 // Simulate streaming partial input.
2958 let input = json!({});
2959 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2960 LanguageModelToolUse {
2961 id: "1".into(),
2962 name: ThinkingTool::name().into(),
2963 raw_input: input.to_string(),
2964 input,
2965 is_input_complete: false,
2966 thought_signature: None,
2967 },
2968 ));
2969
2970 // Input streaming completed
2971 let input = json!({ "content": "Thinking hard!" });
2972 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2973 LanguageModelToolUse {
2974 id: "1".into(),
2975 name: "thinking".into(),
2976 raw_input: input.to_string(),
2977 input,
2978 is_input_complete: true,
2979 thought_signature: None,
2980 },
2981 ));
2982 fake_model.end_last_completion_stream();
2983 cx.run_until_parked();
2984
2985 let tool_call = expect_tool_call(&mut events).await;
2986 assert_eq!(
2987 tool_call,
2988 acp::ToolCall::new("1", "Thinking")
2989 .kind(acp::ToolKind::Think)
2990 .raw_input(json!({}))
2991 .meta(acp::Meta::from_iter([(
2992 "tool_name".into(),
2993 "thinking".into()
2994 )]))
2995 );
2996 let update = expect_tool_call_update_fields(&mut events).await;
2997 assert_eq!(
2998 update,
2999 acp::ToolCallUpdate::new(
3000 "1",
3001 acp::ToolCallUpdateFields::new()
3002 .title("Thinking")
3003 .kind(acp::ToolKind::Think)
3004 .raw_input(json!({ "content": "Thinking hard!"}))
3005 )
3006 );
3007 let update = expect_tool_call_update_fields(&mut events).await;
3008 assert_eq!(
3009 update,
3010 acp::ToolCallUpdate::new(
3011 "1",
3012 acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
3013 )
3014 );
3015 let update = expect_tool_call_update_fields(&mut events).await;
3016 assert_eq!(
3017 update,
3018 acp::ToolCallUpdate::new(
3019 "1",
3020 acp::ToolCallUpdateFields::new().content(vec!["Thinking hard!".into()])
3021 )
3022 );
3023 let update = expect_tool_call_update_fields(&mut events).await;
3024 assert_eq!(
3025 update,
3026 acp::ToolCallUpdate::new(
3027 "1",
3028 acp::ToolCallUpdateFields::new()
3029 .status(acp::ToolCallStatus::Completed)
3030 .raw_output("Finished thinking.")
3031 )
3032 );
3033}
3034
3035#[gpui::test]
3036async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
3037 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3038 let fake_model = model.as_fake();
3039
3040 let mut events = thread
3041 .update(cx, |thread, cx| {
3042 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3043 thread.send(UserMessageId::new(), ["Hello!"], cx)
3044 })
3045 .unwrap();
3046 cx.run_until_parked();
3047
3048 fake_model.send_last_completion_stream_text_chunk("Hey!");
3049 fake_model.end_last_completion_stream();
3050
3051 let mut retry_events = Vec::new();
3052 while let Some(Ok(event)) = events.next().await {
3053 match event {
3054 ThreadEvent::Retry(retry_status) => {
3055 retry_events.push(retry_status);
3056 }
3057 ThreadEvent::Stop(..) => break,
3058 _ => {}
3059 }
3060 }
3061
3062 assert_eq!(retry_events.len(), 0);
3063 thread.read_with(cx, |thread, _cx| {
3064 assert_eq!(
3065 thread.to_markdown(),
3066 indoc! {"
3067 ## User
3068
3069 Hello!
3070
3071 ## Assistant
3072
3073 Hey!
3074 "}
3075 )
3076 });
3077}
3078
3079#[gpui::test]
3080async fn test_send_retry_on_error(cx: &mut TestAppContext) {
3081 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3082 let fake_model = model.as_fake();
3083
3084 let mut events = thread
3085 .update(cx, |thread, cx| {
3086 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3087 thread.send(UserMessageId::new(), ["Hello!"], cx)
3088 })
3089 .unwrap();
3090 cx.run_until_parked();
3091
3092 fake_model.send_last_completion_stream_text_chunk("Hey,");
3093 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3094 provider: LanguageModelProviderName::new("Anthropic"),
3095 retry_after: Some(Duration::from_secs(3)),
3096 });
3097 fake_model.end_last_completion_stream();
3098
3099 cx.executor().advance_clock(Duration::from_secs(3));
3100 cx.run_until_parked();
3101
3102 fake_model.send_last_completion_stream_text_chunk("there!");
3103 fake_model.end_last_completion_stream();
3104 cx.run_until_parked();
3105
3106 let mut retry_events = Vec::new();
3107 while let Some(Ok(event)) = events.next().await {
3108 match event {
3109 ThreadEvent::Retry(retry_status) => {
3110 retry_events.push(retry_status);
3111 }
3112 ThreadEvent::Stop(..) => break,
3113 _ => {}
3114 }
3115 }
3116
3117 assert_eq!(retry_events.len(), 1);
3118 assert!(matches!(
3119 retry_events[0],
3120 acp_thread::RetryStatus { attempt: 1, .. }
3121 ));
3122 thread.read_with(cx, |thread, _cx| {
3123 assert_eq!(
3124 thread.to_markdown(),
3125 indoc! {"
3126 ## User
3127
3128 Hello!
3129
3130 ## Assistant
3131
3132 Hey,
3133
3134 [resume]
3135
3136 ## Assistant
3137
3138 there!
3139 "}
3140 )
3141 });
3142}
3143
3144#[gpui::test]
3145async fn test_send_retry_finishes_tool_calls_on_error(cx: &mut TestAppContext) {
3146 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3147 let fake_model = model.as_fake();
3148
3149 let events = thread
3150 .update(cx, |thread, cx| {
3151 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3152 thread.add_tool(EchoTool);
3153 thread.send(UserMessageId::new(), ["Call the echo tool!"], cx)
3154 })
3155 .unwrap();
3156 cx.run_until_parked();
3157
3158 let tool_use_1 = LanguageModelToolUse {
3159 id: "tool_1".into(),
3160 name: EchoTool::name().into(),
3161 raw_input: json!({"text": "test"}).to_string(),
3162 input: json!({"text": "test"}),
3163 is_input_complete: true,
3164 thought_signature: None,
3165 };
3166 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
3167 tool_use_1.clone(),
3168 ));
3169 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3170 provider: LanguageModelProviderName::new("Anthropic"),
3171 retry_after: Some(Duration::from_secs(3)),
3172 });
3173 fake_model.end_last_completion_stream();
3174
3175 cx.executor().advance_clock(Duration::from_secs(3));
3176 let completion = fake_model.pending_completions().pop().unwrap();
3177 assert_eq!(
3178 completion.messages[1..],
3179 vec![
3180 LanguageModelRequestMessage {
3181 role: Role::User,
3182 content: vec!["Call the echo tool!".into()],
3183 cache: false,
3184 reasoning_details: None,
3185 },
3186 LanguageModelRequestMessage {
3187 role: Role::Assistant,
3188 content: vec![language_model::MessageContent::ToolUse(tool_use_1.clone())],
3189 cache: false,
3190 reasoning_details: None,
3191 },
3192 LanguageModelRequestMessage {
3193 role: Role::User,
3194 content: vec![language_model::MessageContent::ToolResult(
3195 LanguageModelToolResult {
3196 tool_use_id: tool_use_1.id.clone(),
3197 tool_name: tool_use_1.name.clone(),
3198 is_error: false,
3199 content: "test".into(),
3200 output: Some("test".into())
3201 }
3202 )],
3203 cache: true,
3204 reasoning_details: None,
3205 },
3206 ]
3207 );
3208
3209 fake_model.send_last_completion_stream_text_chunk("Done");
3210 fake_model.end_last_completion_stream();
3211 cx.run_until_parked();
3212 events.collect::<Vec<_>>().await;
3213 thread.read_with(cx, |thread, _cx| {
3214 assert_eq!(
3215 thread.last_message(),
3216 Some(Message::Agent(AgentMessage {
3217 content: vec![AgentMessageContent::Text("Done".into())],
3218 tool_results: IndexMap::default(),
3219 reasoning_details: None,
3220 }))
3221 );
3222 })
3223}
3224
3225#[gpui::test]
3226async fn test_send_max_retries_exceeded(cx: &mut TestAppContext) {
3227 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3228 let fake_model = model.as_fake();
3229
3230 let mut events = thread
3231 .update(cx, |thread, cx| {
3232 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3233 thread.send(UserMessageId::new(), ["Hello!"], cx)
3234 })
3235 .unwrap();
3236 cx.run_until_parked();
3237
3238 for _ in 0..crate::thread::MAX_RETRY_ATTEMPTS + 1 {
3239 fake_model.send_last_completion_stream_error(
3240 LanguageModelCompletionError::ServerOverloaded {
3241 provider: LanguageModelProviderName::new("Anthropic"),
3242 retry_after: Some(Duration::from_secs(3)),
3243 },
3244 );
3245 fake_model.end_last_completion_stream();
3246 cx.executor().advance_clock(Duration::from_secs(3));
3247 cx.run_until_parked();
3248 }
3249
3250 let mut errors = Vec::new();
3251 let mut retry_events = Vec::new();
3252 while let Some(event) = events.next().await {
3253 match event {
3254 Ok(ThreadEvent::Retry(retry_status)) => {
3255 retry_events.push(retry_status);
3256 }
3257 Ok(ThreadEvent::Stop(..)) => break,
3258 Err(error) => errors.push(error),
3259 _ => {}
3260 }
3261 }
3262
3263 assert_eq!(
3264 retry_events.len(),
3265 crate::thread::MAX_RETRY_ATTEMPTS as usize
3266 );
3267 for i in 0..crate::thread::MAX_RETRY_ATTEMPTS as usize {
3268 assert_eq!(retry_events[i].attempt, i + 1);
3269 }
3270 assert_eq!(errors.len(), 1);
3271 let error = errors[0]
3272 .downcast_ref::<LanguageModelCompletionError>()
3273 .unwrap();
3274 assert!(matches!(
3275 error,
3276 LanguageModelCompletionError::ServerOverloaded { .. }
3277 ));
3278}
3279
3280/// Filters out the stop events for asserting against in tests
3281fn stop_events(result_events: Vec<Result<ThreadEvent>>) -> Vec<acp::StopReason> {
3282 result_events
3283 .into_iter()
3284 .filter_map(|event| match event.unwrap() {
3285 ThreadEvent::Stop(stop_reason) => Some(stop_reason),
3286 _ => None,
3287 })
3288 .collect()
3289}
3290
3291struct ThreadTest {
3292 model: Arc<dyn LanguageModel>,
3293 thread: Entity<Thread>,
3294 project_context: Entity<ProjectContext>,
3295 context_server_store: Entity<ContextServerStore>,
3296 fs: Arc<FakeFs>,
3297}
3298
3299enum TestModel {
3300 Sonnet4,
3301 Fake,
3302}
3303
3304impl TestModel {
3305 fn id(&self) -> LanguageModelId {
3306 match self {
3307 TestModel::Sonnet4 => LanguageModelId("claude-sonnet-4-latest".into()),
3308 TestModel::Fake => unreachable!(),
3309 }
3310 }
3311}
3312
3313async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
3314 cx.executor().allow_parking();
3315
3316 let fs = FakeFs::new(cx.background_executor.clone());
3317 fs.create_dir(paths::settings_file().parent().unwrap())
3318 .await
3319 .unwrap();
3320 fs.insert_file(
3321 paths::settings_file(),
3322 json!({
3323 "agent": {
3324 "default_profile": "test-profile",
3325 "profiles": {
3326 "test-profile": {
3327 "name": "Test Profile",
3328 "tools": {
3329 EchoTool::name(): true,
3330 DelayTool::name(): true,
3331 WordListTool::name(): true,
3332 ToolRequiringPermission::name(): true,
3333 InfiniteTool::name(): true,
3334 CancellationAwareTool::name(): true,
3335 ThinkingTool::name(): true,
3336 "terminal": true,
3337 }
3338 }
3339 }
3340 }
3341 })
3342 .to_string()
3343 .into_bytes(),
3344 )
3345 .await;
3346
3347 cx.update(|cx| {
3348 settings::init(cx);
3349
3350 match model {
3351 TestModel::Fake => {}
3352 TestModel::Sonnet4 => {
3353 gpui_tokio::init(cx);
3354 let http_client = ReqwestClient::user_agent("agent tests").unwrap();
3355 cx.set_http_client(Arc::new(http_client));
3356 let client = Client::production(cx);
3357 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
3358 language_model::init(client.clone(), cx);
3359 language_models::init(user_store, client.clone(), cx);
3360 }
3361 };
3362
3363 watch_settings(fs.clone(), cx);
3364 });
3365
3366 let templates = Templates::new();
3367
3368 fs.insert_tree(path!("/test"), json!({})).await;
3369 let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
3370
3371 let model = cx
3372 .update(|cx| {
3373 if let TestModel::Fake = model {
3374 Task::ready(Arc::new(FakeLanguageModel::default()) as Arc<_>)
3375 } else {
3376 let model_id = model.id();
3377 let models = LanguageModelRegistry::read_global(cx);
3378 let model = models
3379 .available_models(cx)
3380 .find(|model| model.id() == model_id)
3381 .unwrap();
3382
3383 let provider = models.provider(&model.provider_id()).unwrap();
3384 let authenticated = provider.authenticate(cx);
3385
3386 cx.spawn(async move |_cx| {
3387 authenticated.await.unwrap();
3388 model
3389 })
3390 }
3391 })
3392 .await;
3393
3394 let project_context = cx.new(|_cx| ProjectContext::default());
3395 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3396 let context_server_registry =
3397 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3398 let thread = cx.new(|cx| {
3399 Thread::new(
3400 project,
3401 project_context.clone(),
3402 context_server_registry,
3403 templates,
3404 Some(model.clone()),
3405 cx,
3406 )
3407 });
3408 ThreadTest {
3409 model,
3410 thread,
3411 project_context,
3412 context_server_store,
3413 fs,
3414 }
3415}
3416
3417#[cfg(test)]
3418#[ctor::ctor]
3419fn init_logger() {
3420 if std::env::var("RUST_LOG").is_ok() {
3421 env_logger::init();
3422 }
3423}
3424
3425fn watch_settings(fs: Arc<dyn Fs>, cx: &mut App) {
3426 let fs = fs.clone();
3427 cx.spawn({
3428 async move |cx| {
3429 let mut new_settings_content_rx = settings::watch_config_file(
3430 cx.background_executor(),
3431 fs,
3432 paths::settings_file().clone(),
3433 );
3434
3435 while let Some(new_settings_content) = new_settings_content_rx.next().await {
3436 cx.update(|cx| {
3437 SettingsStore::update_global(cx, |settings, cx| {
3438 settings.set_user_settings(&new_settings_content, cx)
3439 })
3440 })
3441 .ok();
3442 }
3443 }
3444 })
3445 .detach();
3446}
3447
3448fn tool_names_for_completion(completion: &LanguageModelRequest) -> Vec<String> {
3449 completion
3450 .tools
3451 .iter()
3452 .map(|tool| tool.name.clone())
3453 .collect()
3454}
3455
3456fn setup_context_server(
3457 name: &'static str,
3458 tools: Vec<context_server::types::Tool>,
3459 context_server_store: &Entity<ContextServerStore>,
3460 cx: &mut TestAppContext,
3461) -> mpsc::UnboundedReceiver<(
3462 context_server::types::CallToolParams,
3463 oneshot::Sender<context_server::types::CallToolResponse>,
3464)> {
3465 cx.update(|cx| {
3466 let mut settings = ProjectSettings::get_global(cx).clone();
3467 settings.context_servers.insert(
3468 name.into(),
3469 project::project_settings::ContextServerSettings::Stdio {
3470 enabled: true,
3471 command: ContextServerCommand {
3472 path: "somebinary".into(),
3473 args: Vec::new(),
3474 env: None,
3475 timeout: None,
3476 },
3477 },
3478 );
3479 ProjectSettings::override_global(settings, cx);
3480 });
3481
3482 let (mcp_tool_calls_tx, mcp_tool_calls_rx) = mpsc::unbounded();
3483 let fake_transport = context_server::test::create_fake_transport(name, cx.executor())
3484 .on_request::<context_server::types::requests::Initialize, _>(move |_params| async move {
3485 context_server::types::InitializeResponse {
3486 protocol_version: context_server::types::ProtocolVersion(
3487 context_server::types::LATEST_PROTOCOL_VERSION.to_string(),
3488 ),
3489 server_info: context_server::types::Implementation {
3490 name: name.into(),
3491 version: "1.0.0".to_string(),
3492 },
3493 capabilities: context_server::types::ServerCapabilities {
3494 tools: Some(context_server::types::ToolsCapabilities {
3495 list_changed: Some(true),
3496 }),
3497 ..Default::default()
3498 },
3499 meta: None,
3500 }
3501 })
3502 .on_request::<context_server::types::requests::ListTools, _>(move |_params| {
3503 let tools = tools.clone();
3504 async move {
3505 context_server::types::ListToolsResponse {
3506 tools,
3507 next_cursor: None,
3508 meta: None,
3509 }
3510 }
3511 })
3512 .on_request::<context_server::types::requests::CallTool, _>(move |params| {
3513 let mcp_tool_calls_tx = mcp_tool_calls_tx.clone();
3514 async move {
3515 let (response_tx, response_rx) = oneshot::channel();
3516 mcp_tool_calls_tx
3517 .unbounded_send((params, response_tx))
3518 .unwrap();
3519 response_rx.await.unwrap()
3520 }
3521 });
3522 context_server_store.update(cx, |store, cx| {
3523 store.start_server(
3524 Arc::new(ContextServer::new(
3525 ContextServerId(name.into()),
3526 Arc::new(fake_transport),
3527 )),
3528 cx,
3529 );
3530 });
3531 cx.run_until_parked();
3532 mcp_tool_calls_rx
3533}
3534
3535#[gpui::test]
3536async fn test_tokens_before_message(cx: &mut TestAppContext) {
3537 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3538 let fake_model = model.as_fake();
3539
3540 // First message
3541 let message_1_id = UserMessageId::new();
3542 thread
3543 .update(cx, |thread, cx| {
3544 thread.send(message_1_id.clone(), ["First message"], cx)
3545 })
3546 .unwrap();
3547 cx.run_until_parked();
3548
3549 // Before any response, tokens_before_message should return None for first message
3550 thread.read_with(cx, |thread, _| {
3551 assert_eq!(
3552 thread.tokens_before_message(&message_1_id),
3553 None,
3554 "First message should have no tokens before it"
3555 );
3556 });
3557
3558 // Complete first message with usage
3559 fake_model.send_last_completion_stream_text_chunk("Response 1");
3560 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3561 language_model::TokenUsage {
3562 input_tokens: 100,
3563 output_tokens: 50,
3564 cache_creation_input_tokens: 0,
3565 cache_read_input_tokens: 0,
3566 },
3567 ));
3568 fake_model.end_last_completion_stream();
3569 cx.run_until_parked();
3570
3571 // First message still has no tokens before it
3572 thread.read_with(cx, |thread, _| {
3573 assert_eq!(
3574 thread.tokens_before_message(&message_1_id),
3575 None,
3576 "First message should still have no tokens before it after response"
3577 );
3578 });
3579
3580 // Second message
3581 let message_2_id = UserMessageId::new();
3582 thread
3583 .update(cx, |thread, cx| {
3584 thread.send(message_2_id.clone(), ["Second message"], cx)
3585 })
3586 .unwrap();
3587 cx.run_until_parked();
3588
3589 // Second message should have first message's input tokens before it
3590 thread.read_with(cx, |thread, _| {
3591 assert_eq!(
3592 thread.tokens_before_message(&message_2_id),
3593 Some(100),
3594 "Second message should have 100 tokens before it (from first request)"
3595 );
3596 });
3597
3598 // Complete second message
3599 fake_model.send_last_completion_stream_text_chunk("Response 2");
3600 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3601 language_model::TokenUsage {
3602 input_tokens: 250, // Total for this request (includes previous context)
3603 output_tokens: 75,
3604 cache_creation_input_tokens: 0,
3605 cache_read_input_tokens: 0,
3606 },
3607 ));
3608 fake_model.end_last_completion_stream();
3609 cx.run_until_parked();
3610
3611 // Third message
3612 let message_3_id = UserMessageId::new();
3613 thread
3614 .update(cx, |thread, cx| {
3615 thread.send(message_3_id.clone(), ["Third message"], cx)
3616 })
3617 .unwrap();
3618 cx.run_until_parked();
3619
3620 // Third message should have second message's input tokens (250) before it
3621 thread.read_with(cx, |thread, _| {
3622 assert_eq!(
3623 thread.tokens_before_message(&message_3_id),
3624 Some(250),
3625 "Third message should have 250 tokens before it (from second request)"
3626 );
3627 // Second message should still have 100
3628 assert_eq!(
3629 thread.tokens_before_message(&message_2_id),
3630 Some(100),
3631 "Second message should still have 100 tokens before it"
3632 );
3633 // First message still has none
3634 assert_eq!(
3635 thread.tokens_before_message(&message_1_id),
3636 None,
3637 "First message should still have no tokens before it"
3638 );
3639 });
3640}
3641
3642#[gpui::test]
3643async fn test_tokens_before_message_after_truncate(cx: &mut TestAppContext) {
3644 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3645 let fake_model = model.as_fake();
3646
3647 // Set up three messages with responses
3648 let message_1_id = UserMessageId::new();
3649 thread
3650 .update(cx, |thread, cx| {
3651 thread.send(message_1_id.clone(), ["Message 1"], cx)
3652 })
3653 .unwrap();
3654 cx.run_until_parked();
3655 fake_model.send_last_completion_stream_text_chunk("Response 1");
3656 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3657 language_model::TokenUsage {
3658 input_tokens: 100,
3659 output_tokens: 50,
3660 cache_creation_input_tokens: 0,
3661 cache_read_input_tokens: 0,
3662 },
3663 ));
3664 fake_model.end_last_completion_stream();
3665 cx.run_until_parked();
3666
3667 let message_2_id = UserMessageId::new();
3668 thread
3669 .update(cx, |thread, cx| {
3670 thread.send(message_2_id.clone(), ["Message 2"], cx)
3671 })
3672 .unwrap();
3673 cx.run_until_parked();
3674 fake_model.send_last_completion_stream_text_chunk("Response 2");
3675 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3676 language_model::TokenUsage {
3677 input_tokens: 250,
3678 output_tokens: 75,
3679 cache_creation_input_tokens: 0,
3680 cache_read_input_tokens: 0,
3681 },
3682 ));
3683 fake_model.end_last_completion_stream();
3684 cx.run_until_parked();
3685
3686 // Verify initial state
3687 thread.read_with(cx, |thread, _| {
3688 assert_eq!(thread.tokens_before_message(&message_2_id), Some(100));
3689 });
3690
3691 // Truncate at message 2 (removes message 2 and everything after)
3692 thread
3693 .update(cx, |thread, cx| thread.truncate(message_2_id.clone(), cx))
3694 .unwrap();
3695 cx.run_until_parked();
3696
3697 // After truncation, message_2_id no longer exists, so lookup should return None
3698 thread.read_with(cx, |thread, _| {
3699 assert_eq!(
3700 thread.tokens_before_message(&message_2_id),
3701 None,
3702 "After truncation, message 2 no longer exists"
3703 );
3704 // Message 1 still exists but has no tokens before it
3705 assert_eq!(
3706 thread.tokens_before_message(&message_1_id),
3707 None,
3708 "First message still has no tokens before it"
3709 );
3710 });
3711}
3712
3713#[gpui::test]
3714async fn test_terminal_tool_permission_rules(cx: &mut TestAppContext) {
3715 init_test(cx);
3716
3717 let fs = FakeFs::new(cx.executor());
3718 fs.insert_tree("/root", json!({})).await;
3719 let project = Project::test(fs, ["/root".as_ref()], cx).await;
3720
3721 // Test 1: Deny rule blocks command
3722 {
3723 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3724 let environment = Rc::new(FakeThreadEnvironment {
3725 handle: handle.clone(),
3726 });
3727
3728 cx.update(|cx| {
3729 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3730 settings.tool_permissions.tools.insert(
3731 "terminal".into(),
3732 agent_settings::ToolRules {
3733 default_mode: settings::ToolPermissionMode::Confirm,
3734 always_allow: vec![],
3735 always_deny: vec![
3736 agent_settings::CompiledRegex::new(r"rm\s+-rf", false).unwrap(),
3737 ],
3738 always_confirm: vec![],
3739 invalid_patterns: vec![],
3740 },
3741 );
3742 agent_settings::AgentSettings::override_global(settings, cx);
3743 });
3744
3745 #[allow(clippy::arc_with_non_send_sync)]
3746 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3747 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3748
3749 let task = cx.update(|cx| {
3750 tool.run(
3751 crate::TerminalToolInput {
3752 command: "rm -rf /".to_string(),
3753 cd: ".".to_string(),
3754 timeout_ms: None,
3755 },
3756 event_stream,
3757 cx,
3758 )
3759 });
3760
3761 let result = task.await;
3762 assert!(
3763 result.is_err(),
3764 "expected command to be blocked by deny rule"
3765 );
3766 assert!(
3767 result.unwrap_err().to_string().contains("blocked"),
3768 "error should mention the command was blocked"
3769 );
3770 }
3771
3772 // Test 2: Allow rule skips confirmation (and overrides default_mode: Deny)
3773 {
3774 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3775 let environment = Rc::new(FakeThreadEnvironment {
3776 handle: handle.clone(),
3777 });
3778
3779 cx.update(|cx| {
3780 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3781 settings.always_allow_tool_actions = false;
3782 settings.tool_permissions.tools.insert(
3783 "terminal".into(),
3784 agent_settings::ToolRules {
3785 default_mode: settings::ToolPermissionMode::Deny,
3786 always_allow: vec![
3787 agent_settings::CompiledRegex::new(r"^echo\s", false).unwrap(),
3788 ],
3789 always_deny: vec![],
3790 always_confirm: vec![],
3791 invalid_patterns: vec![],
3792 },
3793 );
3794 agent_settings::AgentSettings::override_global(settings, cx);
3795 });
3796
3797 #[allow(clippy::arc_with_non_send_sync)]
3798 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3799 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3800
3801 let task = cx.update(|cx| {
3802 tool.run(
3803 crate::TerminalToolInput {
3804 command: "echo hello".to_string(),
3805 cd: ".".to_string(),
3806 timeout_ms: None,
3807 },
3808 event_stream,
3809 cx,
3810 )
3811 });
3812
3813 let update = rx.expect_update_fields().await;
3814 assert!(
3815 update.content.iter().any(|blocks| {
3816 blocks
3817 .iter()
3818 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
3819 }),
3820 "expected terminal content (allow rule should skip confirmation and override default deny)"
3821 );
3822
3823 let result = task.await;
3824 assert!(
3825 result.is_ok(),
3826 "expected command to succeed without confirmation"
3827 );
3828 }
3829
3830 // Test 3: Confirm rule forces confirmation even with always_allow_tool_actions=true
3831 {
3832 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_with_immediate_exit(cx, 0)));
3833 let environment = Rc::new(FakeThreadEnvironment {
3834 handle: handle.clone(),
3835 });
3836
3837 cx.update(|cx| {
3838 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3839 settings.always_allow_tool_actions = true;
3840 settings.tool_permissions.tools.insert(
3841 "terminal".into(),
3842 agent_settings::ToolRules {
3843 default_mode: settings::ToolPermissionMode::Allow,
3844 always_allow: vec![],
3845 always_deny: vec![],
3846 always_confirm: vec![
3847 agent_settings::CompiledRegex::new(r"sudo", false).unwrap(),
3848 ],
3849 invalid_patterns: vec![],
3850 },
3851 );
3852 agent_settings::AgentSettings::override_global(settings, cx);
3853 });
3854
3855 #[allow(clippy::arc_with_non_send_sync)]
3856 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3857 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
3858
3859 let _task = cx.update(|cx| {
3860 tool.run(
3861 crate::TerminalToolInput {
3862 command: "sudo rm file".to_string(),
3863 cd: ".".to_string(),
3864 timeout_ms: None,
3865 },
3866 event_stream,
3867 cx,
3868 )
3869 });
3870
3871 let auth = rx.expect_authorization().await;
3872 assert!(
3873 auth.tool_call.fields.title.is_some(),
3874 "expected authorization request for sudo command despite always_allow_tool_actions=true"
3875 );
3876 }
3877
3878 // Test 4: default_mode: Deny blocks commands when no pattern matches
3879 {
3880 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
3881 let environment = Rc::new(FakeThreadEnvironment {
3882 handle: handle.clone(),
3883 });
3884
3885 cx.update(|cx| {
3886 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
3887 settings.always_allow_tool_actions = true;
3888 settings.tool_permissions.tools.insert(
3889 "terminal".into(),
3890 agent_settings::ToolRules {
3891 default_mode: settings::ToolPermissionMode::Deny,
3892 always_allow: vec![],
3893 always_deny: vec![],
3894 always_confirm: vec![],
3895 invalid_patterns: vec![],
3896 },
3897 );
3898 agent_settings::AgentSettings::override_global(settings, cx);
3899 });
3900
3901 #[allow(clippy::arc_with_non_send_sync)]
3902 let tool = Arc::new(crate::TerminalTool::new(project.clone(), environment));
3903 let (event_stream, _rx) = crate::ToolCallEventStream::test();
3904
3905 let task = cx.update(|cx| {
3906 tool.run(
3907 crate::TerminalToolInput {
3908 command: "echo hello".to_string(),
3909 cd: ".".to_string(),
3910 timeout_ms: None,
3911 },
3912 event_stream,
3913 cx,
3914 )
3915 });
3916
3917 let result = task.await;
3918 assert!(
3919 result.is_err(),
3920 "expected command to be blocked by default_mode: Deny"
3921 );
3922 assert!(
3923 result.unwrap_err().to_string().contains("disabled"),
3924 "error should mention the tool is disabled"
3925 );
3926 }
3927}