1use super::*;
2use acp_thread::{AgentConnection, AgentModelGroupName, AgentModelList, UserMessageId};
3use agent_client_protocol::{self as acp};
4use agent_settings::AgentProfileId;
5use anyhow::Result;
6use client::{Client, UserStore};
7use cloud_llm_client::CompletionIntent;
8use collections::IndexMap;
9use context_server::{ContextServer, ContextServerCommand, ContextServerId};
10use fs::{FakeFs, Fs};
11use futures::{
12 FutureExt as _, StreamExt,
13 channel::{
14 mpsc::{self, UnboundedReceiver},
15 oneshot,
16 },
17 future::{Fuse, Shared},
18};
19use gpui::{
20 App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal,
21 http_client::FakeHttpClient,
22};
23use indoc::indoc;
24use language_model::{
25 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
26 LanguageModelProviderName, LanguageModelRegistry, LanguageModelRequest,
27 LanguageModelRequestMessage, LanguageModelToolResult, LanguageModelToolSchemaFormat,
28 LanguageModelToolUse, MessageContent, Role, StopReason, fake_provider::FakeLanguageModel,
29};
30use pretty_assertions::assert_eq;
31use project::{
32 Project, context_server_store::ContextServerStore, project_settings::ProjectSettings,
33};
34use prompt_store::ProjectContext;
35use reqwest_client::ReqwestClient;
36use schemars::JsonSchema;
37use serde::{Deserialize, Serialize};
38use serde_json::json;
39use settings::{Settings, SettingsStore};
40use std::{
41 path::Path,
42 pin::Pin,
43 rc::Rc,
44 sync::{
45 Arc,
46 atomic::{AtomicBool, Ordering},
47 },
48 time::Duration,
49};
50use util::path;
51
52mod test_tools;
53use test_tools::*;
54
55fn init_test(cx: &mut TestAppContext) {
56 cx.update(|cx| {
57 let settings_store = SettingsStore::test(cx);
58 cx.set_global(settings_store);
59 });
60}
61
62struct FakeTerminalHandle {
63 killed: Arc<AtomicBool>,
64 stopped_by_user: Arc<AtomicBool>,
65 exit_sender: std::cell::RefCell<Option<futures::channel::oneshot::Sender<()>>>,
66 wait_for_exit: Shared<Task<acp::TerminalExitStatus>>,
67 output: acp::TerminalOutputResponse,
68 id: acp::TerminalId,
69}
70
71impl FakeTerminalHandle {
72 fn new_never_exits(cx: &mut App) -> Self {
73 let killed = Arc::new(AtomicBool::new(false));
74 let stopped_by_user = Arc::new(AtomicBool::new(false));
75
76 let (exit_sender, exit_receiver) = futures::channel::oneshot::channel();
77
78 let wait_for_exit = cx
79 .spawn(async move |_cx| {
80 // Wait for the exit signal (sent when kill() is called)
81 let _ = exit_receiver.await;
82 acp::TerminalExitStatus::new()
83 })
84 .shared();
85
86 Self {
87 killed,
88 stopped_by_user,
89 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
90 wait_for_exit,
91 output: acp::TerminalOutputResponse::new("partial output".to_string(), false),
92 id: acp::TerminalId::new("fake_terminal".to_string()),
93 }
94 }
95
96 fn was_killed(&self) -> bool {
97 self.killed.load(Ordering::SeqCst)
98 }
99
100 fn set_stopped_by_user(&self, stopped: bool) {
101 self.stopped_by_user.store(stopped, Ordering::SeqCst);
102 }
103
104 fn signal_exit(&self) {
105 if let Some(sender) = self.exit_sender.borrow_mut().take() {
106 let _ = sender.send(());
107 }
108 }
109}
110
111impl crate::TerminalHandle for FakeTerminalHandle {
112 fn id(&self, _cx: &AsyncApp) -> Result<acp::TerminalId> {
113 Ok(self.id.clone())
114 }
115
116 fn current_output(&self, _cx: &AsyncApp) -> Result<acp::TerminalOutputResponse> {
117 Ok(self.output.clone())
118 }
119
120 fn wait_for_exit(&self, _cx: &AsyncApp) -> Result<Shared<Task<acp::TerminalExitStatus>>> {
121 Ok(self.wait_for_exit.clone())
122 }
123
124 fn kill(&self, _cx: &AsyncApp) -> Result<()> {
125 self.killed.store(true, Ordering::SeqCst);
126 self.signal_exit();
127 Ok(())
128 }
129
130 fn was_stopped_by_user(&self, _cx: &AsyncApp) -> Result<bool> {
131 Ok(self.stopped_by_user.load(Ordering::SeqCst))
132 }
133}
134
135struct FakeThreadEnvironment {
136 handle: Rc<FakeTerminalHandle>,
137}
138
139impl crate::ThreadEnvironment for FakeThreadEnvironment {
140 fn create_terminal(
141 &self,
142 _command: String,
143 _cwd: Option<std::path::PathBuf>,
144 _output_byte_limit: Option<u64>,
145 _cx: &mut AsyncApp,
146 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
147 Task::ready(Ok(self.handle.clone() as Rc<dyn crate::TerminalHandle>))
148 }
149}
150
151/// Environment that creates multiple independent terminal handles for testing concurrent terminals.
152struct MultiTerminalEnvironment {
153 handles: std::cell::RefCell<Vec<Rc<FakeTerminalHandle>>>,
154}
155
156impl MultiTerminalEnvironment {
157 fn new() -> Self {
158 Self {
159 handles: std::cell::RefCell::new(Vec::new()),
160 }
161 }
162
163 fn handles(&self) -> Vec<Rc<FakeTerminalHandle>> {
164 self.handles.borrow().clone()
165 }
166}
167
168impl crate::ThreadEnvironment for MultiTerminalEnvironment {
169 fn create_terminal(
170 &self,
171 _command: String,
172 _cwd: Option<std::path::PathBuf>,
173 _output_byte_limit: Option<u64>,
174 cx: &mut AsyncApp,
175 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
176 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
177 self.handles.borrow_mut().push(handle.clone());
178 Task::ready(Ok(handle as Rc<dyn crate::TerminalHandle>))
179 }
180}
181
182fn always_allow_tools(cx: &mut TestAppContext) {
183 cx.update(|cx| {
184 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
185 settings.always_allow_tool_actions = true;
186 agent_settings::AgentSettings::override_global(settings, cx);
187 });
188}
189
190#[gpui::test]
191async fn test_echo(cx: &mut TestAppContext) {
192 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
193 let fake_model = model.as_fake();
194
195 let events = thread
196 .update(cx, |thread, cx| {
197 thread.send(UserMessageId::new(), ["Testing: Reply with 'Hello'"], cx)
198 })
199 .unwrap();
200 cx.run_until_parked();
201 fake_model.send_last_completion_stream_text_chunk("Hello");
202 fake_model
203 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
204 fake_model.end_last_completion_stream();
205
206 let events = events.collect().await;
207 thread.update(cx, |thread, _cx| {
208 assert_eq!(
209 thread.last_message().unwrap().to_markdown(),
210 indoc! {"
211 ## Assistant
212
213 Hello
214 "}
215 )
216 });
217 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
218}
219
220#[gpui::test]
221async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) {
222 init_test(cx);
223 always_allow_tools(cx);
224
225 let fs = FakeFs::new(cx.executor());
226 let project = Project::test(fs, [], cx).await;
227
228 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
229 let environment = Rc::new(FakeThreadEnvironment {
230 handle: handle.clone(),
231 });
232
233 #[allow(clippy::arc_with_non_send_sync)]
234 let tool = Arc::new(crate::TerminalTool::new(project, environment));
235 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
236
237 let task = cx.update(|cx| {
238 tool.run(
239 crate::TerminalToolInput {
240 command: "sleep 1000".to_string(),
241 cd: ".".to_string(),
242 timeout_ms: Some(5),
243 },
244 event_stream,
245 cx,
246 )
247 });
248
249 let update = rx.expect_update_fields().await;
250 assert!(
251 update.content.iter().any(|blocks| {
252 blocks
253 .iter()
254 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
255 }),
256 "expected tool call update to include terminal content"
257 );
258
259 let mut task_future: Pin<Box<Fuse<Task<Result<String>>>>> = Box::pin(task.fuse());
260
261 let deadline = std::time::Instant::now() + Duration::from_millis(500);
262 loop {
263 if let Some(result) = task_future.as_mut().now_or_never() {
264 let result = result.expect("terminal tool task should complete");
265
266 assert!(
267 handle.was_killed(),
268 "expected terminal handle to be killed on timeout"
269 );
270 assert!(
271 result.contains("partial output"),
272 "expected result to include terminal output, got: {result}"
273 );
274 return;
275 }
276
277 if std::time::Instant::now() >= deadline {
278 panic!("timed out waiting for terminal tool task to complete");
279 }
280
281 cx.run_until_parked();
282 cx.background_executor.timer(Duration::from_millis(1)).await;
283 }
284}
285
286#[gpui::test]
287#[ignore]
288async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) {
289 init_test(cx);
290 always_allow_tools(cx);
291
292 let fs = FakeFs::new(cx.executor());
293 let project = Project::test(fs, [], cx).await;
294
295 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
296 let environment = Rc::new(FakeThreadEnvironment {
297 handle: handle.clone(),
298 });
299
300 #[allow(clippy::arc_with_non_send_sync)]
301 let tool = Arc::new(crate::TerminalTool::new(project, environment));
302 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
303
304 let _task = cx.update(|cx| {
305 tool.run(
306 crate::TerminalToolInput {
307 command: "sleep 1000".to_string(),
308 cd: ".".to_string(),
309 timeout_ms: None,
310 },
311 event_stream,
312 cx,
313 )
314 });
315
316 let update = rx.expect_update_fields().await;
317 assert!(
318 update.content.iter().any(|blocks| {
319 blocks
320 .iter()
321 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
322 }),
323 "expected tool call update to include terminal content"
324 );
325
326 smol::Timer::after(Duration::from_millis(25)).await;
327
328 assert!(
329 !handle.was_killed(),
330 "did not expect terminal handle to be killed without a timeout"
331 );
332}
333
334#[gpui::test]
335async fn test_thinking(cx: &mut TestAppContext) {
336 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
337 let fake_model = model.as_fake();
338
339 let events = thread
340 .update(cx, |thread, cx| {
341 thread.send(
342 UserMessageId::new(),
343 [indoc! {"
344 Testing:
345
346 Generate a thinking step where you just think the word 'Think',
347 and have your final answer be 'Hello'
348 "}],
349 cx,
350 )
351 })
352 .unwrap();
353 cx.run_until_parked();
354 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Thinking {
355 text: "Think".to_string(),
356 signature: None,
357 });
358 fake_model.send_last_completion_stream_text_chunk("Hello");
359 fake_model
360 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
361 fake_model.end_last_completion_stream();
362
363 let events = events.collect().await;
364 thread.update(cx, |thread, _cx| {
365 assert_eq!(
366 thread.last_message().unwrap().to_markdown(),
367 indoc! {"
368 ## Assistant
369
370 <think>Think</think>
371 Hello
372 "}
373 )
374 });
375 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
376}
377
378#[gpui::test]
379async fn test_system_prompt(cx: &mut TestAppContext) {
380 let ThreadTest {
381 model,
382 thread,
383 project_context,
384 ..
385 } = setup(cx, TestModel::Fake).await;
386 let fake_model = model.as_fake();
387
388 project_context.update(cx, |project_context, _cx| {
389 project_context.shell = "test-shell".into()
390 });
391 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
392 thread
393 .update(cx, |thread, cx| {
394 thread.send(UserMessageId::new(), ["abc"], cx)
395 })
396 .unwrap();
397 cx.run_until_parked();
398 let mut pending_completions = fake_model.pending_completions();
399 assert_eq!(
400 pending_completions.len(),
401 1,
402 "unexpected pending completions: {:?}",
403 pending_completions
404 );
405
406 let pending_completion = pending_completions.pop().unwrap();
407 assert_eq!(pending_completion.messages[0].role, Role::System);
408
409 let system_message = &pending_completion.messages[0];
410 let system_prompt = system_message.content[0].to_str().unwrap();
411 assert!(
412 system_prompt.contains("test-shell"),
413 "unexpected system message: {:?}",
414 system_message
415 );
416 assert!(
417 system_prompt.contains("## Fixing Diagnostics"),
418 "unexpected system message: {:?}",
419 system_message
420 );
421}
422
423#[gpui::test]
424async fn test_system_prompt_without_tools(cx: &mut TestAppContext) {
425 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
426 let fake_model = model.as_fake();
427
428 thread
429 .update(cx, |thread, cx| {
430 thread.send(UserMessageId::new(), ["abc"], cx)
431 })
432 .unwrap();
433 cx.run_until_parked();
434 let mut pending_completions = fake_model.pending_completions();
435 assert_eq!(
436 pending_completions.len(),
437 1,
438 "unexpected pending completions: {:?}",
439 pending_completions
440 );
441
442 let pending_completion = pending_completions.pop().unwrap();
443 assert_eq!(pending_completion.messages[0].role, Role::System);
444
445 let system_message = &pending_completion.messages[0];
446 let system_prompt = system_message.content[0].to_str().unwrap();
447 assert!(
448 !system_prompt.contains("## Tool Use"),
449 "unexpected system message: {:?}",
450 system_message
451 );
452 assert!(
453 !system_prompt.contains("## Fixing Diagnostics"),
454 "unexpected system message: {:?}",
455 system_message
456 );
457}
458
459#[gpui::test]
460async fn test_prompt_caching(cx: &mut TestAppContext) {
461 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
462 let fake_model = model.as_fake();
463
464 // Send initial user message and verify it's cached
465 thread
466 .update(cx, |thread, cx| {
467 thread.send(UserMessageId::new(), ["Message 1"], cx)
468 })
469 .unwrap();
470 cx.run_until_parked();
471
472 let completion = fake_model.pending_completions().pop().unwrap();
473 assert_eq!(
474 completion.messages[1..],
475 vec![LanguageModelRequestMessage {
476 role: Role::User,
477 content: vec!["Message 1".into()],
478 cache: true,
479 reasoning_details: None,
480 }]
481 );
482 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
483 "Response to Message 1".into(),
484 ));
485 fake_model.end_last_completion_stream();
486 cx.run_until_parked();
487
488 // Send another user message and verify only the latest is cached
489 thread
490 .update(cx, |thread, cx| {
491 thread.send(UserMessageId::new(), ["Message 2"], cx)
492 })
493 .unwrap();
494 cx.run_until_parked();
495
496 let completion = fake_model.pending_completions().pop().unwrap();
497 assert_eq!(
498 completion.messages[1..],
499 vec![
500 LanguageModelRequestMessage {
501 role: Role::User,
502 content: vec!["Message 1".into()],
503 cache: false,
504 reasoning_details: None,
505 },
506 LanguageModelRequestMessage {
507 role: Role::Assistant,
508 content: vec!["Response to Message 1".into()],
509 cache: false,
510 reasoning_details: None,
511 },
512 LanguageModelRequestMessage {
513 role: Role::User,
514 content: vec!["Message 2".into()],
515 cache: true,
516 reasoning_details: None,
517 }
518 ]
519 );
520 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
521 "Response to Message 2".into(),
522 ));
523 fake_model.end_last_completion_stream();
524 cx.run_until_parked();
525
526 // Simulate a tool call and verify that the latest tool result is cached
527 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
528 thread
529 .update(cx, |thread, cx| {
530 thread.send(UserMessageId::new(), ["Use the echo tool"], cx)
531 })
532 .unwrap();
533 cx.run_until_parked();
534
535 let tool_use = LanguageModelToolUse {
536 id: "tool_1".into(),
537 name: EchoTool::name().into(),
538 raw_input: json!({"text": "test"}).to_string(),
539 input: json!({"text": "test"}),
540 is_input_complete: true,
541 thought_signature: None,
542 };
543 fake_model
544 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
545 fake_model.end_last_completion_stream();
546 cx.run_until_parked();
547
548 let completion = fake_model.pending_completions().pop().unwrap();
549 let tool_result = LanguageModelToolResult {
550 tool_use_id: "tool_1".into(),
551 tool_name: EchoTool::name().into(),
552 is_error: false,
553 content: "test".into(),
554 output: Some("test".into()),
555 };
556 assert_eq!(
557 completion.messages[1..],
558 vec![
559 LanguageModelRequestMessage {
560 role: Role::User,
561 content: vec!["Message 1".into()],
562 cache: false,
563 reasoning_details: None,
564 },
565 LanguageModelRequestMessage {
566 role: Role::Assistant,
567 content: vec!["Response to Message 1".into()],
568 cache: false,
569 reasoning_details: None,
570 },
571 LanguageModelRequestMessage {
572 role: Role::User,
573 content: vec!["Message 2".into()],
574 cache: false,
575 reasoning_details: None,
576 },
577 LanguageModelRequestMessage {
578 role: Role::Assistant,
579 content: vec!["Response to Message 2".into()],
580 cache: false,
581 reasoning_details: None,
582 },
583 LanguageModelRequestMessage {
584 role: Role::User,
585 content: vec!["Use the echo tool".into()],
586 cache: false,
587 reasoning_details: None,
588 },
589 LanguageModelRequestMessage {
590 role: Role::Assistant,
591 content: vec![MessageContent::ToolUse(tool_use)],
592 cache: false,
593 reasoning_details: None,
594 },
595 LanguageModelRequestMessage {
596 role: Role::User,
597 content: vec![MessageContent::ToolResult(tool_result)],
598 cache: true,
599 reasoning_details: None,
600 }
601 ]
602 );
603}
604
605#[gpui::test]
606#[cfg_attr(not(feature = "e2e"), ignore)]
607async fn test_basic_tool_calls(cx: &mut TestAppContext) {
608 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
609
610 // Test a tool call that's likely to complete *before* streaming stops.
611 let events = thread
612 .update(cx, |thread, cx| {
613 thread.add_tool(EchoTool);
614 thread.send(
615 UserMessageId::new(),
616 ["Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'."],
617 cx,
618 )
619 })
620 .unwrap()
621 .collect()
622 .await;
623 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
624
625 // Test a tool calls that's likely to complete *after* streaming stops.
626 let events = thread
627 .update(cx, |thread, cx| {
628 thread.remove_tool(&EchoTool::name());
629 thread.add_tool(DelayTool);
630 thread.send(
631 UserMessageId::new(),
632 [
633 "Now call the delay tool with 200ms.",
634 "When the timer goes off, then you echo the output of the tool.",
635 ],
636 cx,
637 )
638 })
639 .unwrap()
640 .collect()
641 .await;
642 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
643 thread.update(cx, |thread, _cx| {
644 assert!(
645 thread
646 .last_message()
647 .unwrap()
648 .as_agent_message()
649 .unwrap()
650 .content
651 .iter()
652 .any(|content| {
653 if let AgentMessageContent::Text(text) = content {
654 text.contains("Ding")
655 } else {
656 false
657 }
658 }),
659 "{}",
660 thread.to_markdown()
661 );
662 });
663}
664
665#[gpui::test]
666#[cfg_attr(not(feature = "e2e"), ignore)]
667async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
668 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
669
670 // Test a tool call that's likely to complete *before* streaming stops.
671 let mut events = thread
672 .update(cx, |thread, cx| {
673 thread.add_tool(WordListTool);
674 thread.send(UserMessageId::new(), ["Test the word_list tool."], cx)
675 })
676 .unwrap();
677
678 let mut saw_partial_tool_use = false;
679 while let Some(event) = events.next().await {
680 if let Ok(ThreadEvent::ToolCall(tool_call)) = event {
681 thread.update(cx, |thread, _cx| {
682 // Look for a tool use in the thread's last message
683 let message = thread.last_message().unwrap();
684 let agent_message = message.as_agent_message().unwrap();
685 let last_content = agent_message.content.last().unwrap();
686 if let AgentMessageContent::ToolUse(last_tool_use) = last_content {
687 assert_eq!(last_tool_use.name.as_ref(), "word_list");
688 if tool_call.status == acp::ToolCallStatus::Pending {
689 if !last_tool_use.is_input_complete
690 && last_tool_use.input.get("g").is_none()
691 {
692 saw_partial_tool_use = true;
693 }
694 } else {
695 last_tool_use
696 .input
697 .get("a")
698 .expect("'a' has streamed because input is now complete");
699 last_tool_use
700 .input
701 .get("g")
702 .expect("'g' has streamed because input is now complete");
703 }
704 } else {
705 panic!("last content should be a tool use");
706 }
707 });
708 }
709 }
710
711 assert!(
712 saw_partial_tool_use,
713 "should see at least one partially streamed tool use in the history"
714 );
715}
716
717#[gpui::test]
718async fn test_tool_authorization(cx: &mut TestAppContext) {
719 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
720 let fake_model = model.as_fake();
721
722 let mut events = thread
723 .update(cx, |thread, cx| {
724 thread.add_tool(ToolRequiringPermission);
725 thread.send(UserMessageId::new(), ["abc"], cx)
726 })
727 .unwrap();
728 cx.run_until_parked();
729 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
730 LanguageModelToolUse {
731 id: "tool_id_1".into(),
732 name: ToolRequiringPermission::name().into(),
733 raw_input: "{}".into(),
734 input: json!({}),
735 is_input_complete: true,
736 thought_signature: None,
737 },
738 ));
739 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
740 LanguageModelToolUse {
741 id: "tool_id_2".into(),
742 name: ToolRequiringPermission::name().into(),
743 raw_input: "{}".into(),
744 input: json!({}),
745 is_input_complete: true,
746 thought_signature: None,
747 },
748 ));
749 fake_model.end_last_completion_stream();
750 let tool_call_auth_1 = next_tool_call_authorization(&mut events).await;
751 let tool_call_auth_2 = next_tool_call_authorization(&mut events).await;
752
753 // Approve the first
754 tool_call_auth_1
755 .response
756 .send(tool_call_auth_1.options[1].option_id.clone())
757 .unwrap();
758 cx.run_until_parked();
759
760 // Reject the second
761 tool_call_auth_2
762 .response
763 .send(tool_call_auth_1.options[2].option_id.clone())
764 .unwrap();
765 cx.run_until_parked();
766
767 let completion = fake_model.pending_completions().pop().unwrap();
768 let message = completion.messages.last().unwrap();
769 assert_eq!(
770 message.content,
771 vec![
772 language_model::MessageContent::ToolResult(LanguageModelToolResult {
773 tool_use_id: tool_call_auth_1.tool_call.tool_call_id.0.to_string().into(),
774 tool_name: ToolRequiringPermission::name().into(),
775 is_error: false,
776 content: "Allowed".into(),
777 output: Some("Allowed".into())
778 }),
779 language_model::MessageContent::ToolResult(LanguageModelToolResult {
780 tool_use_id: tool_call_auth_2.tool_call.tool_call_id.0.to_string().into(),
781 tool_name: ToolRequiringPermission::name().into(),
782 is_error: true,
783 content: "Permission to run tool denied by user".into(),
784 output: Some("Permission to run tool denied by user".into())
785 })
786 ]
787 );
788
789 // Simulate yet another tool call.
790 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
791 LanguageModelToolUse {
792 id: "tool_id_3".into(),
793 name: ToolRequiringPermission::name().into(),
794 raw_input: "{}".into(),
795 input: json!({}),
796 is_input_complete: true,
797 thought_signature: None,
798 },
799 ));
800 fake_model.end_last_completion_stream();
801
802 // Respond by always allowing tools.
803 let tool_call_auth_3 = next_tool_call_authorization(&mut events).await;
804 tool_call_auth_3
805 .response
806 .send(tool_call_auth_3.options[0].option_id.clone())
807 .unwrap();
808 cx.run_until_parked();
809 let completion = fake_model.pending_completions().pop().unwrap();
810 let message = completion.messages.last().unwrap();
811 assert_eq!(
812 message.content,
813 vec![language_model::MessageContent::ToolResult(
814 LanguageModelToolResult {
815 tool_use_id: tool_call_auth_3.tool_call.tool_call_id.0.to_string().into(),
816 tool_name: ToolRequiringPermission::name().into(),
817 is_error: false,
818 content: "Allowed".into(),
819 output: Some("Allowed".into())
820 }
821 )]
822 );
823
824 // Simulate a final tool call, ensuring we don't trigger authorization.
825 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
826 LanguageModelToolUse {
827 id: "tool_id_4".into(),
828 name: ToolRequiringPermission::name().into(),
829 raw_input: "{}".into(),
830 input: json!({}),
831 is_input_complete: true,
832 thought_signature: None,
833 },
834 ));
835 fake_model.end_last_completion_stream();
836 cx.run_until_parked();
837 let completion = fake_model.pending_completions().pop().unwrap();
838 let message = completion.messages.last().unwrap();
839 assert_eq!(
840 message.content,
841 vec![language_model::MessageContent::ToolResult(
842 LanguageModelToolResult {
843 tool_use_id: "tool_id_4".into(),
844 tool_name: ToolRequiringPermission::name().into(),
845 is_error: false,
846 content: "Allowed".into(),
847 output: Some("Allowed".into())
848 }
849 )]
850 );
851}
852
853#[gpui::test]
854async fn test_tool_hallucination(cx: &mut TestAppContext) {
855 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
856 let fake_model = model.as_fake();
857
858 let mut events = thread
859 .update(cx, |thread, cx| {
860 thread.send(UserMessageId::new(), ["abc"], cx)
861 })
862 .unwrap();
863 cx.run_until_parked();
864 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
865 LanguageModelToolUse {
866 id: "tool_id_1".into(),
867 name: "nonexistent_tool".into(),
868 raw_input: "{}".into(),
869 input: json!({}),
870 is_input_complete: true,
871 thought_signature: None,
872 },
873 ));
874 fake_model.end_last_completion_stream();
875
876 let tool_call = expect_tool_call(&mut events).await;
877 assert_eq!(tool_call.title, "nonexistent_tool");
878 assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
879 let update = expect_tool_call_update_fields(&mut events).await;
880 assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
881}
882
883#[gpui::test]
884async fn test_resume_after_tool_use_limit(cx: &mut TestAppContext) {
885 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
886 let fake_model = model.as_fake();
887
888 let events = thread
889 .update(cx, |thread, cx| {
890 thread.add_tool(EchoTool);
891 thread.send(UserMessageId::new(), ["abc"], cx)
892 })
893 .unwrap();
894 cx.run_until_parked();
895 let tool_use = LanguageModelToolUse {
896 id: "tool_id_1".into(),
897 name: EchoTool::name().into(),
898 raw_input: "{}".into(),
899 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
900 is_input_complete: true,
901 thought_signature: None,
902 };
903 fake_model
904 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
905 fake_model.end_last_completion_stream();
906
907 cx.run_until_parked();
908 let completion = fake_model.pending_completions().pop().unwrap();
909 let tool_result = LanguageModelToolResult {
910 tool_use_id: "tool_id_1".into(),
911 tool_name: EchoTool::name().into(),
912 is_error: false,
913 content: "def".into(),
914 output: Some("def".into()),
915 };
916 assert_eq!(
917 completion.messages[1..],
918 vec![
919 LanguageModelRequestMessage {
920 role: Role::User,
921 content: vec!["abc".into()],
922 cache: false,
923 reasoning_details: None,
924 },
925 LanguageModelRequestMessage {
926 role: Role::Assistant,
927 content: vec![MessageContent::ToolUse(tool_use.clone())],
928 cache: false,
929 reasoning_details: None,
930 },
931 LanguageModelRequestMessage {
932 role: Role::User,
933 content: vec![MessageContent::ToolResult(tool_result.clone())],
934 cache: true,
935 reasoning_details: None,
936 },
937 ]
938 );
939
940 // Simulate reaching tool use limit.
941 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
942 fake_model.end_last_completion_stream();
943 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
944 assert!(
945 last_event
946 .unwrap_err()
947 .is::<language_model::ToolUseLimitReachedError>()
948 );
949
950 let events = thread.update(cx, |thread, cx| thread.resume(cx)).unwrap();
951 cx.run_until_parked();
952 let completion = fake_model.pending_completions().pop().unwrap();
953 assert_eq!(
954 completion.messages[1..],
955 vec![
956 LanguageModelRequestMessage {
957 role: Role::User,
958 content: vec!["abc".into()],
959 cache: false,
960 reasoning_details: None,
961 },
962 LanguageModelRequestMessage {
963 role: Role::Assistant,
964 content: vec![MessageContent::ToolUse(tool_use)],
965 cache: false,
966 reasoning_details: None,
967 },
968 LanguageModelRequestMessage {
969 role: Role::User,
970 content: vec![MessageContent::ToolResult(tool_result)],
971 cache: false,
972 reasoning_details: None,
973 },
974 LanguageModelRequestMessage {
975 role: Role::User,
976 content: vec!["Continue where you left off".into()],
977 cache: true,
978 reasoning_details: None,
979 }
980 ]
981 );
982
983 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text("Done".into()));
984 fake_model.end_last_completion_stream();
985 events.collect::<Vec<_>>().await;
986 thread.read_with(cx, |thread, _cx| {
987 assert_eq!(
988 thread.last_message().unwrap().to_markdown(),
989 indoc! {"
990 ## Assistant
991
992 Done
993 "}
994 )
995 });
996}
997
998#[gpui::test]
999async fn test_send_after_tool_use_limit(cx: &mut TestAppContext) {
1000 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1001 let fake_model = model.as_fake();
1002
1003 let events = thread
1004 .update(cx, |thread, cx| {
1005 thread.add_tool(EchoTool);
1006 thread.send(UserMessageId::new(), ["abc"], cx)
1007 })
1008 .unwrap();
1009 cx.run_until_parked();
1010
1011 let tool_use = LanguageModelToolUse {
1012 id: "tool_id_1".into(),
1013 name: EchoTool::name().into(),
1014 raw_input: "{}".into(),
1015 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
1016 is_input_complete: true,
1017 thought_signature: None,
1018 };
1019 let tool_result = LanguageModelToolResult {
1020 tool_use_id: "tool_id_1".into(),
1021 tool_name: EchoTool::name().into(),
1022 is_error: false,
1023 content: "def".into(),
1024 output: Some("def".into()),
1025 };
1026 fake_model
1027 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
1028 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
1029 fake_model.end_last_completion_stream();
1030 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
1031 assert!(
1032 last_event
1033 .unwrap_err()
1034 .is::<language_model::ToolUseLimitReachedError>()
1035 );
1036
1037 thread
1038 .update(cx, |thread, cx| {
1039 thread.send(UserMessageId::new(), vec!["ghi"], cx)
1040 })
1041 .unwrap();
1042 cx.run_until_parked();
1043 let completion = fake_model.pending_completions().pop().unwrap();
1044 assert_eq!(
1045 completion.messages[1..],
1046 vec![
1047 LanguageModelRequestMessage {
1048 role: Role::User,
1049 content: vec!["abc".into()],
1050 cache: false,
1051 reasoning_details: None,
1052 },
1053 LanguageModelRequestMessage {
1054 role: Role::Assistant,
1055 content: vec![MessageContent::ToolUse(tool_use)],
1056 cache: false,
1057 reasoning_details: None,
1058 },
1059 LanguageModelRequestMessage {
1060 role: Role::User,
1061 content: vec![MessageContent::ToolResult(tool_result)],
1062 cache: false,
1063 reasoning_details: None,
1064 },
1065 LanguageModelRequestMessage {
1066 role: Role::User,
1067 content: vec!["ghi".into()],
1068 cache: true,
1069 reasoning_details: None,
1070 }
1071 ]
1072 );
1073}
1074
1075async fn expect_tool_call(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::ToolCall {
1076 let event = events
1077 .next()
1078 .await
1079 .expect("no tool call authorization event received")
1080 .unwrap();
1081 match event {
1082 ThreadEvent::ToolCall(tool_call) => tool_call,
1083 event => {
1084 panic!("Unexpected event {event:?}");
1085 }
1086 }
1087}
1088
1089async fn expect_tool_call_update_fields(
1090 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1091) -> acp::ToolCallUpdate {
1092 let event = events
1093 .next()
1094 .await
1095 .expect("no tool call authorization event received")
1096 .unwrap();
1097 match event {
1098 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(update)) => update,
1099 event => {
1100 panic!("Unexpected event {event:?}");
1101 }
1102 }
1103}
1104
1105async fn next_tool_call_authorization(
1106 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1107) -> ToolCallAuthorization {
1108 loop {
1109 let event = events
1110 .next()
1111 .await
1112 .expect("no tool call authorization event received")
1113 .unwrap();
1114 if let ThreadEvent::ToolCallAuthorization(tool_call_authorization) = event {
1115 let permission_kinds = tool_call_authorization
1116 .options
1117 .iter()
1118 .map(|o| o.kind)
1119 .collect::<Vec<_>>();
1120 assert_eq!(
1121 permission_kinds,
1122 vec![
1123 acp::PermissionOptionKind::AllowAlways,
1124 acp::PermissionOptionKind::AllowOnce,
1125 acp::PermissionOptionKind::RejectOnce,
1126 ]
1127 );
1128 return tool_call_authorization;
1129 }
1130 }
1131}
1132
1133#[gpui::test]
1134#[cfg_attr(not(feature = "e2e"), ignore)]
1135async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
1136 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1137
1138 // Test concurrent tool calls with different delay times
1139 let events = thread
1140 .update(cx, |thread, cx| {
1141 thread.add_tool(DelayTool);
1142 thread.send(
1143 UserMessageId::new(),
1144 [
1145 "Call the delay tool twice in the same message.",
1146 "Once with 100ms. Once with 300ms.",
1147 "When both timers are complete, describe the outputs.",
1148 ],
1149 cx,
1150 )
1151 })
1152 .unwrap()
1153 .collect()
1154 .await;
1155
1156 let stop_reasons = stop_events(events);
1157 assert_eq!(stop_reasons, vec![acp::StopReason::EndTurn]);
1158
1159 thread.update(cx, |thread, _cx| {
1160 let last_message = thread.last_message().unwrap();
1161 let agent_message = last_message.as_agent_message().unwrap();
1162 let text = agent_message
1163 .content
1164 .iter()
1165 .filter_map(|content| {
1166 if let AgentMessageContent::Text(text) = content {
1167 Some(text.as_str())
1168 } else {
1169 None
1170 }
1171 })
1172 .collect::<String>();
1173
1174 assert!(text.contains("Ding"));
1175 });
1176}
1177
1178#[gpui::test]
1179async fn test_profiles(cx: &mut TestAppContext) {
1180 let ThreadTest {
1181 model, thread, fs, ..
1182 } = setup(cx, TestModel::Fake).await;
1183 let fake_model = model.as_fake();
1184
1185 thread.update(cx, |thread, _cx| {
1186 thread.add_tool(DelayTool);
1187 thread.add_tool(EchoTool);
1188 thread.add_tool(InfiniteTool);
1189 });
1190
1191 // Override profiles and wait for settings to be loaded.
1192 fs.insert_file(
1193 paths::settings_file(),
1194 json!({
1195 "agent": {
1196 "profiles": {
1197 "test-1": {
1198 "name": "Test Profile 1",
1199 "tools": {
1200 EchoTool::name(): true,
1201 DelayTool::name(): true,
1202 }
1203 },
1204 "test-2": {
1205 "name": "Test Profile 2",
1206 "tools": {
1207 InfiniteTool::name(): true,
1208 }
1209 }
1210 }
1211 }
1212 })
1213 .to_string()
1214 .into_bytes(),
1215 )
1216 .await;
1217 cx.run_until_parked();
1218
1219 // Test that test-1 profile (default) has echo and delay tools
1220 thread
1221 .update(cx, |thread, cx| {
1222 thread.set_profile(AgentProfileId("test-1".into()), cx);
1223 thread.send(UserMessageId::new(), ["test"], cx)
1224 })
1225 .unwrap();
1226 cx.run_until_parked();
1227
1228 let mut pending_completions = fake_model.pending_completions();
1229 assert_eq!(pending_completions.len(), 1);
1230 let completion = pending_completions.pop().unwrap();
1231 let tool_names: Vec<String> = completion
1232 .tools
1233 .iter()
1234 .map(|tool| tool.name.clone())
1235 .collect();
1236 assert_eq!(tool_names, vec![DelayTool::name(), EchoTool::name()]);
1237 fake_model.end_last_completion_stream();
1238
1239 // Switch to test-2 profile, and verify that it has only the infinite tool.
1240 thread
1241 .update(cx, |thread, cx| {
1242 thread.set_profile(AgentProfileId("test-2".into()), cx);
1243 thread.send(UserMessageId::new(), ["test2"], cx)
1244 })
1245 .unwrap();
1246 cx.run_until_parked();
1247 let mut pending_completions = fake_model.pending_completions();
1248 assert_eq!(pending_completions.len(), 1);
1249 let completion = pending_completions.pop().unwrap();
1250 let tool_names: Vec<String> = completion
1251 .tools
1252 .iter()
1253 .map(|tool| tool.name.clone())
1254 .collect();
1255 assert_eq!(tool_names, vec![InfiniteTool::name()]);
1256}
1257
1258#[gpui::test]
1259async fn test_mcp_tools(cx: &mut TestAppContext) {
1260 let ThreadTest {
1261 model,
1262 thread,
1263 context_server_store,
1264 fs,
1265 ..
1266 } = setup(cx, TestModel::Fake).await;
1267 let fake_model = model.as_fake();
1268
1269 // Override profiles and wait for settings to be loaded.
1270 fs.insert_file(
1271 paths::settings_file(),
1272 json!({
1273 "agent": {
1274 "always_allow_tool_actions": true,
1275 "profiles": {
1276 "test": {
1277 "name": "Test Profile",
1278 "enable_all_context_servers": true,
1279 "tools": {
1280 EchoTool::name(): true,
1281 }
1282 },
1283 }
1284 }
1285 })
1286 .to_string()
1287 .into_bytes(),
1288 )
1289 .await;
1290 cx.run_until_parked();
1291 thread.update(cx, |thread, cx| {
1292 thread.set_profile(AgentProfileId("test".into()), cx)
1293 });
1294
1295 let mut mcp_tool_calls = setup_context_server(
1296 "test_server",
1297 vec![context_server::types::Tool {
1298 name: "echo".into(),
1299 description: None,
1300 input_schema: serde_json::to_value(EchoTool::input_schema(
1301 LanguageModelToolSchemaFormat::JsonSchema,
1302 ))
1303 .unwrap(),
1304 output_schema: None,
1305 annotations: None,
1306 }],
1307 &context_server_store,
1308 cx,
1309 );
1310
1311 let events = thread.update(cx, |thread, cx| {
1312 thread.send(UserMessageId::new(), ["Hey"], cx).unwrap()
1313 });
1314 cx.run_until_parked();
1315
1316 // Simulate the model calling the MCP tool.
1317 let completion = fake_model.pending_completions().pop().unwrap();
1318 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1319 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1320 LanguageModelToolUse {
1321 id: "tool_1".into(),
1322 name: "echo".into(),
1323 raw_input: json!({"text": "test"}).to_string(),
1324 input: json!({"text": "test"}),
1325 is_input_complete: true,
1326 thought_signature: None,
1327 },
1328 ));
1329 fake_model.end_last_completion_stream();
1330 cx.run_until_parked();
1331
1332 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1333 assert_eq!(tool_call_params.name, "echo");
1334 assert_eq!(tool_call_params.arguments, Some(json!({"text": "test"})));
1335 tool_call_response
1336 .send(context_server::types::CallToolResponse {
1337 content: vec![context_server::types::ToolResponseContent::Text {
1338 text: "test".into(),
1339 }],
1340 is_error: None,
1341 meta: None,
1342 structured_content: None,
1343 })
1344 .unwrap();
1345 cx.run_until_parked();
1346
1347 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1348 fake_model.send_last_completion_stream_text_chunk("Done!");
1349 fake_model.end_last_completion_stream();
1350 events.collect::<Vec<_>>().await;
1351
1352 // Send again after adding the echo tool, ensuring the name collision is resolved.
1353 let events = thread.update(cx, |thread, cx| {
1354 thread.add_tool(EchoTool);
1355 thread.send(UserMessageId::new(), ["Go"], cx).unwrap()
1356 });
1357 cx.run_until_parked();
1358 let completion = fake_model.pending_completions().pop().unwrap();
1359 assert_eq!(
1360 tool_names_for_completion(&completion),
1361 vec!["echo", "test_server_echo"]
1362 );
1363 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1364 LanguageModelToolUse {
1365 id: "tool_2".into(),
1366 name: "test_server_echo".into(),
1367 raw_input: json!({"text": "mcp"}).to_string(),
1368 input: json!({"text": "mcp"}),
1369 is_input_complete: true,
1370 thought_signature: None,
1371 },
1372 ));
1373 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1374 LanguageModelToolUse {
1375 id: "tool_3".into(),
1376 name: "echo".into(),
1377 raw_input: json!({"text": "native"}).to_string(),
1378 input: json!({"text": "native"}),
1379 is_input_complete: true,
1380 thought_signature: None,
1381 },
1382 ));
1383 fake_model.end_last_completion_stream();
1384 cx.run_until_parked();
1385
1386 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1387 assert_eq!(tool_call_params.name, "echo");
1388 assert_eq!(tool_call_params.arguments, Some(json!({"text": "mcp"})));
1389 tool_call_response
1390 .send(context_server::types::CallToolResponse {
1391 content: vec![context_server::types::ToolResponseContent::Text { text: "mcp".into() }],
1392 is_error: None,
1393 meta: None,
1394 structured_content: None,
1395 })
1396 .unwrap();
1397 cx.run_until_parked();
1398
1399 // Ensure the tool results were inserted with the correct names.
1400 let completion = fake_model.pending_completions().pop().unwrap();
1401 assert_eq!(
1402 completion.messages.last().unwrap().content,
1403 vec![
1404 MessageContent::ToolResult(LanguageModelToolResult {
1405 tool_use_id: "tool_3".into(),
1406 tool_name: "echo".into(),
1407 is_error: false,
1408 content: "native".into(),
1409 output: Some("native".into()),
1410 },),
1411 MessageContent::ToolResult(LanguageModelToolResult {
1412 tool_use_id: "tool_2".into(),
1413 tool_name: "test_server_echo".into(),
1414 is_error: false,
1415 content: "mcp".into(),
1416 output: Some("mcp".into()),
1417 },),
1418 ]
1419 );
1420 fake_model.end_last_completion_stream();
1421 events.collect::<Vec<_>>().await;
1422}
1423
1424#[gpui::test]
1425async fn test_mcp_tool_truncation(cx: &mut TestAppContext) {
1426 let ThreadTest {
1427 model,
1428 thread,
1429 context_server_store,
1430 fs,
1431 ..
1432 } = setup(cx, TestModel::Fake).await;
1433 let fake_model = model.as_fake();
1434
1435 // Set up a profile with all tools enabled
1436 fs.insert_file(
1437 paths::settings_file(),
1438 json!({
1439 "agent": {
1440 "profiles": {
1441 "test": {
1442 "name": "Test Profile",
1443 "enable_all_context_servers": true,
1444 "tools": {
1445 EchoTool::name(): true,
1446 DelayTool::name(): true,
1447 WordListTool::name(): true,
1448 ToolRequiringPermission::name(): true,
1449 InfiniteTool::name(): true,
1450 }
1451 },
1452 }
1453 }
1454 })
1455 .to_string()
1456 .into_bytes(),
1457 )
1458 .await;
1459 cx.run_until_parked();
1460
1461 thread.update(cx, |thread, cx| {
1462 thread.set_profile(AgentProfileId("test".into()), cx);
1463 thread.add_tool(EchoTool);
1464 thread.add_tool(DelayTool);
1465 thread.add_tool(WordListTool);
1466 thread.add_tool(ToolRequiringPermission);
1467 thread.add_tool(InfiniteTool);
1468 });
1469
1470 // Set up multiple context servers with some overlapping tool names
1471 let _server1_calls = setup_context_server(
1472 "xxx",
1473 vec![
1474 context_server::types::Tool {
1475 name: "echo".into(), // Conflicts with native EchoTool
1476 description: None,
1477 input_schema: serde_json::to_value(EchoTool::input_schema(
1478 LanguageModelToolSchemaFormat::JsonSchema,
1479 ))
1480 .unwrap(),
1481 output_schema: None,
1482 annotations: None,
1483 },
1484 context_server::types::Tool {
1485 name: "unique_tool_1".into(),
1486 description: None,
1487 input_schema: json!({"type": "object", "properties": {}}),
1488 output_schema: None,
1489 annotations: None,
1490 },
1491 ],
1492 &context_server_store,
1493 cx,
1494 );
1495
1496 let _server2_calls = setup_context_server(
1497 "yyy",
1498 vec![
1499 context_server::types::Tool {
1500 name: "echo".into(), // Also conflicts with native EchoTool
1501 description: None,
1502 input_schema: serde_json::to_value(EchoTool::input_schema(
1503 LanguageModelToolSchemaFormat::JsonSchema,
1504 ))
1505 .unwrap(),
1506 output_schema: None,
1507 annotations: None,
1508 },
1509 context_server::types::Tool {
1510 name: "unique_tool_2".into(),
1511 description: None,
1512 input_schema: json!({"type": "object", "properties": {}}),
1513 output_schema: None,
1514 annotations: None,
1515 },
1516 context_server::types::Tool {
1517 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1518 description: None,
1519 input_schema: json!({"type": "object", "properties": {}}),
1520 output_schema: None,
1521 annotations: None,
1522 },
1523 context_server::types::Tool {
1524 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1525 description: None,
1526 input_schema: json!({"type": "object", "properties": {}}),
1527 output_schema: None,
1528 annotations: None,
1529 },
1530 ],
1531 &context_server_store,
1532 cx,
1533 );
1534 let _server3_calls = setup_context_server(
1535 "zzz",
1536 vec![
1537 context_server::types::Tool {
1538 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1539 description: None,
1540 input_schema: json!({"type": "object", "properties": {}}),
1541 output_schema: None,
1542 annotations: None,
1543 },
1544 context_server::types::Tool {
1545 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1546 description: None,
1547 input_schema: json!({"type": "object", "properties": {}}),
1548 output_schema: None,
1549 annotations: None,
1550 },
1551 context_server::types::Tool {
1552 name: "c".repeat(MAX_TOOL_NAME_LENGTH + 1),
1553 description: None,
1554 input_schema: json!({"type": "object", "properties": {}}),
1555 output_schema: None,
1556 annotations: None,
1557 },
1558 ],
1559 &context_server_store,
1560 cx,
1561 );
1562
1563 thread
1564 .update(cx, |thread, cx| {
1565 thread.send(UserMessageId::new(), ["Go"], cx)
1566 })
1567 .unwrap();
1568 cx.run_until_parked();
1569 let completion = fake_model.pending_completions().pop().unwrap();
1570 assert_eq!(
1571 tool_names_for_completion(&completion),
1572 vec![
1573 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1574 "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
1575 "delay",
1576 "echo",
1577 "infinite",
1578 "tool_requiring_permission",
1579 "unique_tool_1",
1580 "unique_tool_2",
1581 "word_list",
1582 "xxx_echo",
1583 "y_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1584 "yyy_echo",
1585 "z_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1586 ]
1587 );
1588}
1589
1590#[gpui::test]
1591#[cfg_attr(not(feature = "e2e"), ignore)]
1592async fn test_cancellation(cx: &mut TestAppContext) {
1593 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1594
1595 let mut events = thread
1596 .update(cx, |thread, cx| {
1597 thread.add_tool(InfiniteTool);
1598 thread.add_tool(EchoTool);
1599 thread.send(
1600 UserMessageId::new(),
1601 ["Call the echo tool, then call the infinite tool, then explain their output"],
1602 cx,
1603 )
1604 })
1605 .unwrap();
1606
1607 // Wait until both tools are called.
1608 let mut expected_tools = vec!["Echo", "Infinite Tool"];
1609 let mut echo_id = None;
1610 let mut echo_completed = false;
1611 while let Some(event) = events.next().await {
1612 match event.unwrap() {
1613 ThreadEvent::ToolCall(tool_call) => {
1614 assert_eq!(tool_call.title, expected_tools.remove(0));
1615 if tool_call.title == "Echo" {
1616 echo_id = Some(tool_call.tool_call_id);
1617 }
1618 }
1619 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1620 acp::ToolCallUpdate {
1621 tool_call_id,
1622 fields:
1623 acp::ToolCallUpdateFields {
1624 status: Some(acp::ToolCallStatus::Completed),
1625 ..
1626 },
1627 ..
1628 },
1629 )) if Some(&tool_call_id) == echo_id.as_ref() => {
1630 echo_completed = true;
1631 }
1632 _ => {}
1633 }
1634
1635 if expected_tools.is_empty() && echo_completed {
1636 break;
1637 }
1638 }
1639
1640 // Cancel the current send and ensure that the event stream is closed, even
1641 // if one of the tools is still running.
1642 thread.update(cx, |thread, cx| thread.cancel(cx)).await;
1643 let events = events.collect::<Vec<_>>().await;
1644 let last_event = events.last();
1645 assert!(
1646 matches!(
1647 last_event,
1648 Some(Ok(ThreadEvent::Stop(acp::StopReason::Cancelled)))
1649 ),
1650 "unexpected event {last_event:?}"
1651 );
1652
1653 // Ensure we can still send a new message after cancellation.
1654 let events = thread
1655 .update(cx, |thread, cx| {
1656 thread.send(
1657 UserMessageId::new(),
1658 ["Testing: reply with 'Hello' then stop."],
1659 cx,
1660 )
1661 })
1662 .unwrap()
1663 .collect::<Vec<_>>()
1664 .await;
1665 thread.update(cx, |thread, _cx| {
1666 let message = thread.last_message().unwrap();
1667 let agent_message = message.as_agent_message().unwrap();
1668 assert_eq!(
1669 agent_message.content,
1670 vec![AgentMessageContent::Text("Hello".to_string())]
1671 );
1672 });
1673 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1674}
1675
1676#[gpui::test]
1677async fn test_terminal_tool_cancellation_captures_output(cx: &mut TestAppContext) {
1678 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1679 always_allow_tools(cx);
1680 let fake_model = model.as_fake();
1681
1682 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1683 let environment = Rc::new(FakeThreadEnvironment {
1684 handle: handle.clone(),
1685 });
1686
1687 let mut events = thread
1688 .update(cx, |thread, cx| {
1689 thread.add_tool(crate::TerminalTool::new(
1690 thread.project().clone(),
1691 environment,
1692 ));
1693 thread.send(UserMessageId::new(), ["run a command"], cx)
1694 })
1695 .unwrap();
1696
1697 cx.run_until_parked();
1698
1699 // Simulate the model calling the terminal tool
1700 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1701 LanguageModelToolUse {
1702 id: "terminal_tool_1".into(),
1703 name: "terminal".into(),
1704 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1705 input: json!({"command": "sleep 1000", "cd": "."}),
1706 is_input_complete: true,
1707 thought_signature: None,
1708 },
1709 ));
1710 fake_model.end_last_completion_stream();
1711
1712 // Wait for the terminal tool to start running
1713 wait_for_terminal_tool_started(&mut events, cx).await;
1714
1715 // Cancel the thread while the terminal is running
1716 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
1717
1718 // Collect remaining events, driving the executor to let cancellation complete
1719 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1720
1721 // Verify the terminal was killed
1722 assert!(
1723 handle.was_killed(),
1724 "expected terminal handle to be killed on cancellation"
1725 );
1726
1727 // Verify we got a cancellation stop event
1728 assert_eq!(
1729 stop_events(remaining_events),
1730 vec![acp::StopReason::Cancelled],
1731 );
1732
1733 // Verify the tool result contains the terminal output, not just "Tool canceled by user"
1734 thread.update(cx, |thread, _cx| {
1735 let message = thread.last_message().unwrap();
1736 let agent_message = message.as_agent_message().unwrap();
1737
1738 let tool_use = agent_message
1739 .content
1740 .iter()
1741 .find_map(|content| match content {
1742 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
1743 _ => None,
1744 })
1745 .expect("expected tool use in agent message");
1746
1747 let tool_result = agent_message
1748 .tool_results
1749 .get(&tool_use.id)
1750 .expect("expected tool result");
1751
1752 let result_text = match &tool_result.content {
1753 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
1754 _ => panic!("expected text content in tool result"),
1755 };
1756
1757 // "partial output" comes from FakeTerminalHandle's output field
1758 assert!(
1759 result_text.contains("partial output"),
1760 "expected tool result to contain terminal output, got: {result_text}"
1761 );
1762 // Match the actual format from process_content in terminal_tool.rs
1763 assert!(
1764 result_text.contains("The user stopped this command"),
1765 "expected tool result to indicate user stopped, got: {result_text}"
1766 );
1767 });
1768
1769 // Verify we can send a new message after cancellation
1770 verify_thread_recovery(&thread, &fake_model, cx).await;
1771}
1772
1773/// Helper to verify thread can recover after cancellation by sending a simple message.
1774async fn verify_thread_recovery(
1775 thread: &Entity<Thread>,
1776 fake_model: &FakeLanguageModel,
1777 cx: &mut TestAppContext,
1778) {
1779 let events = thread
1780 .update(cx, |thread, cx| {
1781 thread.send(
1782 UserMessageId::new(),
1783 ["Testing: reply with 'Hello' then stop."],
1784 cx,
1785 )
1786 })
1787 .unwrap();
1788 cx.run_until_parked();
1789 fake_model.send_last_completion_stream_text_chunk("Hello");
1790 fake_model
1791 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
1792 fake_model.end_last_completion_stream();
1793
1794 let events = events.collect::<Vec<_>>().await;
1795 thread.update(cx, |thread, _cx| {
1796 let message = thread.last_message().unwrap();
1797 let agent_message = message.as_agent_message().unwrap();
1798 assert_eq!(
1799 agent_message.content,
1800 vec![AgentMessageContent::Text("Hello".to_string())]
1801 );
1802 });
1803 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1804}
1805
1806/// Waits for a terminal tool to start by watching for a ToolCallUpdate with terminal content.
1807async fn wait_for_terminal_tool_started(
1808 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1809 cx: &mut TestAppContext,
1810) {
1811 let deadline = cx.executor().num_cpus() * 100; // Scale with available parallelism
1812 for _ in 0..deadline {
1813 cx.run_until_parked();
1814
1815 while let Some(Some(event)) = events.next().now_or_never() {
1816 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1817 update,
1818 ))) = &event
1819 {
1820 if update.fields.content.as_ref().is_some_and(|content| {
1821 content
1822 .iter()
1823 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1824 }) {
1825 return;
1826 }
1827 }
1828 }
1829
1830 cx.background_executor
1831 .timer(Duration::from_millis(10))
1832 .await;
1833 }
1834 panic!("terminal tool did not start within the expected time");
1835}
1836
1837/// Collects events until a Stop event is received, driving the executor to completion.
1838async fn collect_events_until_stop(
1839 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1840 cx: &mut TestAppContext,
1841) -> Vec<Result<ThreadEvent>> {
1842 let mut collected = Vec::new();
1843 let deadline = cx.executor().num_cpus() * 200;
1844
1845 for _ in 0..deadline {
1846 cx.executor().advance_clock(Duration::from_millis(10));
1847 cx.run_until_parked();
1848
1849 while let Some(Some(event)) = events.next().now_or_never() {
1850 let is_stop = matches!(&event, Ok(ThreadEvent::Stop(_)));
1851 collected.push(event);
1852 if is_stop {
1853 return collected;
1854 }
1855 }
1856 }
1857 panic!(
1858 "did not receive Stop event within the expected time; collected {} events",
1859 collected.len()
1860 );
1861}
1862
1863#[gpui::test]
1864async fn test_truncate_while_terminal_tool_running(cx: &mut TestAppContext) {
1865 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1866 always_allow_tools(cx);
1867 let fake_model = model.as_fake();
1868
1869 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1870 let environment = Rc::new(FakeThreadEnvironment {
1871 handle: handle.clone(),
1872 });
1873
1874 let message_id = UserMessageId::new();
1875 let mut events = thread
1876 .update(cx, |thread, cx| {
1877 thread.add_tool(crate::TerminalTool::new(
1878 thread.project().clone(),
1879 environment,
1880 ));
1881 thread.send(message_id.clone(), ["run a command"], cx)
1882 })
1883 .unwrap();
1884
1885 cx.run_until_parked();
1886
1887 // Simulate the model calling the terminal tool
1888 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1889 LanguageModelToolUse {
1890 id: "terminal_tool_1".into(),
1891 name: "terminal".into(),
1892 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1893 input: json!({"command": "sleep 1000", "cd": "."}),
1894 is_input_complete: true,
1895 thought_signature: None,
1896 },
1897 ));
1898 fake_model.end_last_completion_stream();
1899
1900 // Wait for the terminal tool to start running
1901 wait_for_terminal_tool_started(&mut events, cx).await;
1902
1903 // Truncate the thread while the terminal is running
1904 thread
1905 .update(cx, |thread, cx| thread.truncate(message_id, cx))
1906 .unwrap();
1907
1908 // Drive the executor to let cancellation complete
1909 let _ = collect_events_until_stop(&mut events, cx).await;
1910
1911 // Verify the terminal was killed
1912 assert!(
1913 handle.was_killed(),
1914 "expected terminal handle to be killed on truncate"
1915 );
1916
1917 // Verify the thread is empty after truncation
1918 thread.update(cx, |thread, _cx| {
1919 assert_eq!(
1920 thread.to_markdown(),
1921 "",
1922 "expected thread to be empty after truncating the only message"
1923 );
1924 });
1925
1926 // Verify we can send a new message after truncation
1927 verify_thread_recovery(&thread, &fake_model, cx).await;
1928}
1929
1930#[gpui::test]
1931async fn test_cancel_multiple_concurrent_terminal_tools(cx: &mut TestAppContext) {
1932 // Tests that cancellation properly kills all running terminal tools when multiple are active.
1933 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1934 always_allow_tools(cx);
1935 let fake_model = model.as_fake();
1936
1937 let environment = Rc::new(MultiTerminalEnvironment::new());
1938
1939 let mut events = thread
1940 .update(cx, |thread, cx| {
1941 thread.add_tool(crate::TerminalTool::new(
1942 thread.project().clone(),
1943 environment.clone(),
1944 ));
1945 thread.send(UserMessageId::new(), ["run multiple commands"], cx)
1946 })
1947 .unwrap();
1948
1949 cx.run_until_parked();
1950
1951 // Simulate the model calling two terminal tools
1952 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1953 LanguageModelToolUse {
1954 id: "terminal_tool_1".into(),
1955 name: "terminal".into(),
1956 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1957 input: json!({"command": "sleep 1000", "cd": "."}),
1958 is_input_complete: true,
1959 thought_signature: None,
1960 },
1961 ));
1962 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1963 LanguageModelToolUse {
1964 id: "terminal_tool_2".into(),
1965 name: "terminal".into(),
1966 raw_input: r#"{"command": "sleep 2000", "cd": "."}"#.into(),
1967 input: json!({"command": "sleep 2000", "cd": "."}),
1968 is_input_complete: true,
1969 thought_signature: None,
1970 },
1971 ));
1972 fake_model.end_last_completion_stream();
1973
1974 // Wait for both terminal tools to start by counting terminal content updates
1975 let mut terminals_started = 0;
1976 let deadline = cx.executor().num_cpus() * 100;
1977 for _ in 0..deadline {
1978 cx.run_until_parked();
1979
1980 while let Some(Some(event)) = events.next().now_or_never() {
1981 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1982 update,
1983 ))) = &event
1984 {
1985 if update.fields.content.as_ref().is_some_and(|content| {
1986 content
1987 .iter()
1988 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1989 }) {
1990 terminals_started += 1;
1991 if terminals_started >= 2 {
1992 break;
1993 }
1994 }
1995 }
1996 }
1997 if terminals_started >= 2 {
1998 break;
1999 }
2000
2001 cx.background_executor
2002 .timer(Duration::from_millis(10))
2003 .await;
2004 }
2005 assert!(
2006 terminals_started >= 2,
2007 "expected 2 terminal tools to start, got {terminals_started}"
2008 );
2009
2010 // Cancel the thread while both terminals are running
2011 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
2012
2013 // Collect remaining events
2014 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2015
2016 // Verify both terminal handles were killed
2017 let handles = environment.handles();
2018 assert_eq!(
2019 handles.len(),
2020 2,
2021 "expected 2 terminal handles to be created"
2022 );
2023 assert!(
2024 handles[0].was_killed(),
2025 "expected first terminal handle to be killed on cancellation"
2026 );
2027 assert!(
2028 handles[1].was_killed(),
2029 "expected second terminal handle to be killed on cancellation"
2030 );
2031
2032 // Verify we got a cancellation stop event
2033 assert_eq!(
2034 stop_events(remaining_events),
2035 vec![acp::StopReason::Cancelled],
2036 );
2037}
2038
2039#[gpui::test]
2040async fn test_terminal_tool_stopped_via_terminal_card_button(cx: &mut TestAppContext) {
2041 // Tests that clicking the stop button on the terminal card (as opposed to the main
2042 // cancel button) properly reports user stopped via the was_stopped_by_user path.
2043 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2044 always_allow_tools(cx);
2045 let fake_model = model.as_fake();
2046
2047 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2048 let environment = Rc::new(FakeThreadEnvironment {
2049 handle: handle.clone(),
2050 });
2051
2052 let mut events = thread
2053 .update(cx, |thread, cx| {
2054 thread.add_tool(crate::TerminalTool::new(
2055 thread.project().clone(),
2056 environment,
2057 ));
2058 thread.send(UserMessageId::new(), ["run a command"], cx)
2059 })
2060 .unwrap();
2061
2062 cx.run_until_parked();
2063
2064 // Simulate the model calling the terminal tool
2065 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2066 LanguageModelToolUse {
2067 id: "terminal_tool_1".into(),
2068 name: "terminal".into(),
2069 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2070 input: json!({"command": "sleep 1000", "cd": "."}),
2071 is_input_complete: true,
2072 thought_signature: None,
2073 },
2074 ));
2075 fake_model.end_last_completion_stream();
2076
2077 // Wait for the terminal tool to start running
2078 wait_for_terminal_tool_started(&mut events, cx).await;
2079
2080 // Simulate user clicking stop on the terminal card itself.
2081 // This sets the flag and signals exit (simulating what the real UI would do).
2082 handle.set_stopped_by_user(true);
2083 handle.killed.store(true, Ordering::SeqCst);
2084 handle.signal_exit();
2085
2086 // Wait for the tool to complete
2087 cx.run_until_parked();
2088
2089 // The thread continues after tool completion - simulate the model ending its turn
2090 fake_model
2091 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2092 fake_model.end_last_completion_stream();
2093
2094 // Collect remaining events
2095 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2096
2097 // Verify we got an EndTurn (not Cancelled, since we didn't cancel the thread)
2098 assert_eq!(
2099 stop_events(remaining_events),
2100 vec![acp::StopReason::EndTurn],
2101 );
2102
2103 // Verify the tool result indicates user stopped
2104 thread.update(cx, |thread, _cx| {
2105 let message = thread.last_message().unwrap();
2106 let agent_message = message.as_agent_message().unwrap();
2107
2108 let tool_use = agent_message
2109 .content
2110 .iter()
2111 .find_map(|content| match content {
2112 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2113 _ => None,
2114 })
2115 .expect("expected tool use in agent message");
2116
2117 let tool_result = agent_message
2118 .tool_results
2119 .get(&tool_use.id)
2120 .expect("expected tool result");
2121
2122 let result_text = match &tool_result.content {
2123 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2124 _ => panic!("expected text content in tool result"),
2125 };
2126
2127 assert!(
2128 result_text.contains("The user stopped this command"),
2129 "expected tool result to indicate user stopped, got: {result_text}"
2130 );
2131 });
2132}
2133
2134#[gpui::test]
2135async fn test_terminal_tool_timeout_expires(cx: &mut TestAppContext) {
2136 // Tests that when a timeout is configured and expires, the tool result indicates timeout.
2137 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2138 always_allow_tools(cx);
2139 let fake_model = model.as_fake();
2140
2141 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2142 let environment = Rc::new(FakeThreadEnvironment {
2143 handle: handle.clone(),
2144 });
2145
2146 let mut events = thread
2147 .update(cx, |thread, cx| {
2148 thread.add_tool(crate::TerminalTool::new(
2149 thread.project().clone(),
2150 environment,
2151 ));
2152 thread.send(UserMessageId::new(), ["run a command with timeout"], cx)
2153 })
2154 .unwrap();
2155
2156 cx.run_until_parked();
2157
2158 // Simulate the model calling the terminal tool with a short timeout
2159 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2160 LanguageModelToolUse {
2161 id: "terminal_tool_1".into(),
2162 name: "terminal".into(),
2163 raw_input: r#"{"command": "sleep 1000", "cd": ".", "timeout_ms": 100}"#.into(),
2164 input: json!({"command": "sleep 1000", "cd": ".", "timeout_ms": 100}),
2165 is_input_complete: true,
2166 thought_signature: None,
2167 },
2168 ));
2169 fake_model.end_last_completion_stream();
2170
2171 // Wait for the terminal tool to start running
2172 wait_for_terminal_tool_started(&mut events, cx).await;
2173
2174 // Advance clock past the timeout
2175 cx.executor().advance_clock(Duration::from_millis(200));
2176 cx.run_until_parked();
2177
2178 // The thread continues after tool completion - simulate the model ending its turn
2179 fake_model
2180 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2181 fake_model.end_last_completion_stream();
2182
2183 // Collect remaining events
2184 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2185
2186 // Verify the terminal was killed due to timeout
2187 assert!(
2188 handle.was_killed(),
2189 "expected terminal handle to be killed on timeout"
2190 );
2191
2192 // Verify we got an EndTurn (the tool completed, just with timeout)
2193 assert_eq!(
2194 stop_events(remaining_events),
2195 vec![acp::StopReason::EndTurn],
2196 );
2197
2198 // Verify the tool result indicates timeout, not user stopped
2199 thread.update(cx, |thread, _cx| {
2200 let message = thread.last_message().unwrap();
2201 let agent_message = message.as_agent_message().unwrap();
2202
2203 let tool_use = agent_message
2204 .content
2205 .iter()
2206 .find_map(|content| match content {
2207 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2208 _ => None,
2209 })
2210 .expect("expected tool use in agent message");
2211
2212 let tool_result = agent_message
2213 .tool_results
2214 .get(&tool_use.id)
2215 .expect("expected tool result");
2216
2217 let result_text = match &tool_result.content {
2218 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2219 _ => panic!("expected text content in tool result"),
2220 };
2221
2222 assert!(
2223 result_text.contains("timed out"),
2224 "expected tool result to indicate timeout, got: {result_text}"
2225 );
2226 assert!(
2227 !result_text.contains("The user stopped"),
2228 "tool result should not mention user stopped when it timed out, got: {result_text}"
2229 );
2230 });
2231}
2232
2233#[gpui::test]
2234async fn test_in_progress_send_canceled_by_next_send(cx: &mut TestAppContext) {
2235 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2236 let fake_model = model.as_fake();
2237
2238 let events_1 = thread
2239 .update(cx, |thread, cx| {
2240 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2241 })
2242 .unwrap();
2243 cx.run_until_parked();
2244 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2245 cx.run_until_parked();
2246
2247 let events_2 = thread
2248 .update(cx, |thread, cx| {
2249 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2250 })
2251 .unwrap();
2252 cx.run_until_parked();
2253 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2254 fake_model
2255 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2256 fake_model.end_last_completion_stream();
2257
2258 let events_1 = events_1.collect::<Vec<_>>().await;
2259 assert_eq!(stop_events(events_1), vec![acp::StopReason::Cancelled]);
2260 let events_2 = events_2.collect::<Vec<_>>().await;
2261 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2262}
2263
2264#[gpui::test]
2265async fn test_subsequent_successful_sends_dont_cancel(cx: &mut TestAppContext) {
2266 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2267 let fake_model = model.as_fake();
2268
2269 let events_1 = thread
2270 .update(cx, |thread, cx| {
2271 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2272 })
2273 .unwrap();
2274 cx.run_until_parked();
2275 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2276 fake_model
2277 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2278 fake_model.end_last_completion_stream();
2279 let events_1 = events_1.collect::<Vec<_>>().await;
2280
2281 let events_2 = thread
2282 .update(cx, |thread, cx| {
2283 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2284 })
2285 .unwrap();
2286 cx.run_until_parked();
2287 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2288 fake_model
2289 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2290 fake_model.end_last_completion_stream();
2291 let events_2 = events_2.collect::<Vec<_>>().await;
2292
2293 assert_eq!(stop_events(events_1), vec![acp::StopReason::EndTurn]);
2294 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2295}
2296
2297#[gpui::test]
2298async fn test_refusal(cx: &mut TestAppContext) {
2299 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2300 let fake_model = model.as_fake();
2301
2302 let events = thread
2303 .update(cx, |thread, cx| {
2304 thread.send(UserMessageId::new(), ["Hello"], cx)
2305 })
2306 .unwrap();
2307 cx.run_until_parked();
2308 thread.read_with(cx, |thread, _| {
2309 assert_eq!(
2310 thread.to_markdown(),
2311 indoc! {"
2312 ## User
2313
2314 Hello
2315 "}
2316 );
2317 });
2318
2319 fake_model.send_last_completion_stream_text_chunk("Hey!");
2320 cx.run_until_parked();
2321 thread.read_with(cx, |thread, _| {
2322 assert_eq!(
2323 thread.to_markdown(),
2324 indoc! {"
2325 ## User
2326
2327 Hello
2328
2329 ## Assistant
2330
2331 Hey!
2332 "}
2333 );
2334 });
2335
2336 // If the model refuses to continue, the thread should remove all the messages after the last user message.
2337 fake_model
2338 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::Refusal));
2339 let events = events.collect::<Vec<_>>().await;
2340 assert_eq!(stop_events(events), vec![acp::StopReason::Refusal]);
2341 thread.read_with(cx, |thread, _| {
2342 assert_eq!(thread.to_markdown(), "");
2343 });
2344}
2345
2346#[gpui::test]
2347async fn test_truncate_first_message(cx: &mut TestAppContext) {
2348 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2349 let fake_model = model.as_fake();
2350
2351 let message_id = UserMessageId::new();
2352 thread
2353 .update(cx, |thread, cx| {
2354 thread.send(message_id.clone(), ["Hello"], cx)
2355 })
2356 .unwrap();
2357 cx.run_until_parked();
2358 thread.read_with(cx, |thread, _| {
2359 assert_eq!(
2360 thread.to_markdown(),
2361 indoc! {"
2362 ## User
2363
2364 Hello
2365 "}
2366 );
2367 assert_eq!(thread.latest_token_usage(), None);
2368 });
2369
2370 fake_model.send_last_completion_stream_text_chunk("Hey!");
2371 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2372 language_model::TokenUsage {
2373 input_tokens: 32_000,
2374 output_tokens: 16_000,
2375 cache_creation_input_tokens: 0,
2376 cache_read_input_tokens: 0,
2377 },
2378 ));
2379 cx.run_until_parked();
2380 thread.read_with(cx, |thread, _| {
2381 assert_eq!(
2382 thread.to_markdown(),
2383 indoc! {"
2384 ## User
2385
2386 Hello
2387
2388 ## Assistant
2389
2390 Hey!
2391 "}
2392 );
2393 assert_eq!(
2394 thread.latest_token_usage(),
2395 Some(acp_thread::TokenUsage {
2396 used_tokens: 32_000 + 16_000,
2397 max_tokens: 1_000_000,
2398 output_tokens: 16_000,
2399 })
2400 );
2401 });
2402
2403 thread
2404 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2405 .unwrap();
2406 cx.run_until_parked();
2407 thread.read_with(cx, |thread, _| {
2408 assert_eq!(thread.to_markdown(), "");
2409 assert_eq!(thread.latest_token_usage(), None);
2410 });
2411
2412 // Ensure we can still send a new message after truncation.
2413 thread
2414 .update(cx, |thread, cx| {
2415 thread.send(UserMessageId::new(), ["Hi"], cx)
2416 })
2417 .unwrap();
2418 thread.update(cx, |thread, _cx| {
2419 assert_eq!(
2420 thread.to_markdown(),
2421 indoc! {"
2422 ## User
2423
2424 Hi
2425 "}
2426 );
2427 });
2428 cx.run_until_parked();
2429 fake_model.send_last_completion_stream_text_chunk("Ahoy!");
2430 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2431 language_model::TokenUsage {
2432 input_tokens: 40_000,
2433 output_tokens: 20_000,
2434 cache_creation_input_tokens: 0,
2435 cache_read_input_tokens: 0,
2436 },
2437 ));
2438 cx.run_until_parked();
2439 thread.read_with(cx, |thread, _| {
2440 assert_eq!(
2441 thread.to_markdown(),
2442 indoc! {"
2443 ## User
2444
2445 Hi
2446
2447 ## Assistant
2448
2449 Ahoy!
2450 "}
2451 );
2452
2453 assert_eq!(
2454 thread.latest_token_usage(),
2455 Some(acp_thread::TokenUsage {
2456 used_tokens: 40_000 + 20_000,
2457 max_tokens: 1_000_000,
2458 output_tokens: 20_000,
2459 })
2460 );
2461 });
2462}
2463
2464#[gpui::test]
2465async fn test_truncate_second_message(cx: &mut TestAppContext) {
2466 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2467 let fake_model = model.as_fake();
2468
2469 thread
2470 .update(cx, |thread, cx| {
2471 thread.send(UserMessageId::new(), ["Message 1"], cx)
2472 })
2473 .unwrap();
2474 cx.run_until_parked();
2475 fake_model.send_last_completion_stream_text_chunk("Message 1 response");
2476 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2477 language_model::TokenUsage {
2478 input_tokens: 32_000,
2479 output_tokens: 16_000,
2480 cache_creation_input_tokens: 0,
2481 cache_read_input_tokens: 0,
2482 },
2483 ));
2484 fake_model.end_last_completion_stream();
2485 cx.run_until_parked();
2486
2487 let assert_first_message_state = |cx: &mut TestAppContext| {
2488 thread.clone().read_with(cx, |thread, _| {
2489 assert_eq!(
2490 thread.to_markdown(),
2491 indoc! {"
2492 ## User
2493
2494 Message 1
2495
2496 ## Assistant
2497
2498 Message 1 response
2499 "}
2500 );
2501
2502 assert_eq!(
2503 thread.latest_token_usage(),
2504 Some(acp_thread::TokenUsage {
2505 used_tokens: 32_000 + 16_000,
2506 max_tokens: 1_000_000,
2507 output_tokens: 16_000,
2508 })
2509 );
2510 });
2511 };
2512
2513 assert_first_message_state(cx);
2514
2515 let second_message_id = UserMessageId::new();
2516 thread
2517 .update(cx, |thread, cx| {
2518 thread.send(second_message_id.clone(), ["Message 2"], cx)
2519 })
2520 .unwrap();
2521 cx.run_until_parked();
2522
2523 fake_model.send_last_completion_stream_text_chunk("Message 2 response");
2524 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2525 language_model::TokenUsage {
2526 input_tokens: 40_000,
2527 output_tokens: 20_000,
2528 cache_creation_input_tokens: 0,
2529 cache_read_input_tokens: 0,
2530 },
2531 ));
2532 fake_model.end_last_completion_stream();
2533 cx.run_until_parked();
2534
2535 thread.read_with(cx, |thread, _| {
2536 assert_eq!(
2537 thread.to_markdown(),
2538 indoc! {"
2539 ## User
2540
2541 Message 1
2542
2543 ## Assistant
2544
2545 Message 1 response
2546
2547 ## User
2548
2549 Message 2
2550
2551 ## Assistant
2552
2553 Message 2 response
2554 "}
2555 );
2556
2557 assert_eq!(
2558 thread.latest_token_usage(),
2559 Some(acp_thread::TokenUsage {
2560 used_tokens: 40_000 + 20_000,
2561 max_tokens: 1_000_000,
2562 output_tokens: 20_000,
2563 })
2564 );
2565 });
2566
2567 thread
2568 .update(cx, |thread, cx| thread.truncate(second_message_id, cx))
2569 .unwrap();
2570 cx.run_until_parked();
2571
2572 assert_first_message_state(cx);
2573}
2574
2575#[gpui::test]
2576async fn test_title_generation(cx: &mut TestAppContext) {
2577 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2578 let fake_model = model.as_fake();
2579
2580 let summary_model = Arc::new(FakeLanguageModel::default());
2581 thread.update(cx, |thread, cx| {
2582 thread.set_summarization_model(Some(summary_model.clone()), cx)
2583 });
2584
2585 let send = thread
2586 .update(cx, |thread, cx| {
2587 thread.send(UserMessageId::new(), ["Hello"], cx)
2588 })
2589 .unwrap();
2590 cx.run_until_parked();
2591
2592 fake_model.send_last_completion_stream_text_chunk("Hey!");
2593 fake_model.end_last_completion_stream();
2594 cx.run_until_parked();
2595 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "New Thread"));
2596
2597 // Ensure the summary model has been invoked to generate a title.
2598 summary_model.send_last_completion_stream_text_chunk("Hello ");
2599 summary_model.send_last_completion_stream_text_chunk("world\nG");
2600 summary_model.send_last_completion_stream_text_chunk("oodnight Moon");
2601 summary_model.end_last_completion_stream();
2602 send.collect::<Vec<_>>().await;
2603 cx.run_until_parked();
2604 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2605
2606 // Send another message, ensuring no title is generated this time.
2607 let send = thread
2608 .update(cx, |thread, cx| {
2609 thread.send(UserMessageId::new(), ["Hello again"], cx)
2610 })
2611 .unwrap();
2612 cx.run_until_parked();
2613 fake_model.send_last_completion_stream_text_chunk("Hey again!");
2614 fake_model.end_last_completion_stream();
2615 cx.run_until_parked();
2616 assert_eq!(summary_model.pending_completions(), Vec::new());
2617 send.collect::<Vec<_>>().await;
2618 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2619}
2620
2621#[gpui::test]
2622async fn test_building_request_with_pending_tools(cx: &mut TestAppContext) {
2623 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2624 let fake_model = model.as_fake();
2625
2626 let _events = thread
2627 .update(cx, |thread, cx| {
2628 thread.add_tool(ToolRequiringPermission);
2629 thread.add_tool(EchoTool);
2630 thread.send(UserMessageId::new(), ["Hey!"], cx)
2631 })
2632 .unwrap();
2633 cx.run_until_parked();
2634
2635 let permission_tool_use = LanguageModelToolUse {
2636 id: "tool_id_1".into(),
2637 name: ToolRequiringPermission::name().into(),
2638 raw_input: "{}".into(),
2639 input: json!({}),
2640 is_input_complete: true,
2641 thought_signature: None,
2642 };
2643 let echo_tool_use = LanguageModelToolUse {
2644 id: "tool_id_2".into(),
2645 name: EchoTool::name().into(),
2646 raw_input: json!({"text": "test"}).to_string(),
2647 input: json!({"text": "test"}),
2648 is_input_complete: true,
2649 thought_signature: None,
2650 };
2651 fake_model.send_last_completion_stream_text_chunk("Hi!");
2652 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2653 permission_tool_use,
2654 ));
2655 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2656 echo_tool_use.clone(),
2657 ));
2658 fake_model.end_last_completion_stream();
2659 cx.run_until_parked();
2660
2661 // Ensure pending tools are skipped when building a request.
2662 let request = thread
2663 .read_with(cx, |thread, cx| {
2664 thread.build_completion_request(CompletionIntent::EditFile, cx)
2665 })
2666 .unwrap();
2667 assert_eq!(
2668 request.messages[1..],
2669 vec![
2670 LanguageModelRequestMessage {
2671 role: Role::User,
2672 content: vec!["Hey!".into()],
2673 cache: true,
2674 reasoning_details: None,
2675 },
2676 LanguageModelRequestMessage {
2677 role: Role::Assistant,
2678 content: vec![
2679 MessageContent::Text("Hi!".into()),
2680 MessageContent::ToolUse(echo_tool_use.clone())
2681 ],
2682 cache: false,
2683 reasoning_details: None,
2684 },
2685 LanguageModelRequestMessage {
2686 role: Role::User,
2687 content: vec![MessageContent::ToolResult(LanguageModelToolResult {
2688 tool_use_id: echo_tool_use.id.clone(),
2689 tool_name: echo_tool_use.name,
2690 is_error: false,
2691 content: "test".into(),
2692 output: Some("test".into())
2693 })],
2694 cache: false,
2695 reasoning_details: None,
2696 },
2697 ],
2698 );
2699}
2700
2701#[gpui::test]
2702async fn test_agent_connection(cx: &mut TestAppContext) {
2703 cx.update(settings::init);
2704 let templates = Templates::new();
2705
2706 // Initialize language model system with test provider
2707 cx.update(|cx| {
2708 gpui_tokio::init(cx);
2709
2710 let http_client = FakeHttpClient::with_404_response();
2711 let clock = Arc::new(clock::FakeSystemClock::new());
2712 let client = Client::new(clock, http_client, cx);
2713 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
2714 language_model::init(client.clone(), cx);
2715 language_models::init(user_store, client.clone(), cx);
2716 LanguageModelRegistry::test(cx);
2717 });
2718 cx.executor().forbid_parking();
2719
2720 // Create a project for new_thread
2721 let fake_fs = cx.update(|cx| fs::FakeFs::new(cx.background_executor().clone()));
2722 fake_fs.insert_tree(path!("/test"), json!({})).await;
2723 let project = Project::test(fake_fs.clone(), [Path::new("/test")], cx).await;
2724 let cwd = Path::new("/test");
2725 let text_thread_store =
2726 cx.new(|cx| assistant_text_thread::TextThreadStore::fake(project.clone(), cx));
2727 let history_store = cx.new(|cx| HistoryStore::new(text_thread_store, cx));
2728
2729 // Create agent and connection
2730 let agent = NativeAgent::new(
2731 project.clone(),
2732 history_store,
2733 templates.clone(),
2734 None,
2735 fake_fs.clone(),
2736 &mut cx.to_async(),
2737 )
2738 .await
2739 .unwrap();
2740 let connection = NativeAgentConnection(agent.clone());
2741
2742 // Create a thread using new_thread
2743 let connection_rc = Rc::new(connection.clone());
2744 let acp_thread = cx
2745 .update(|cx| connection_rc.new_thread(project, cwd, cx))
2746 .await
2747 .expect("new_thread should succeed");
2748
2749 // Get the session_id from the AcpThread
2750 let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
2751
2752 // Test model_selector returns Some
2753 let selector_opt = connection.model_selector(&session_id);
2754 assert!(
2755 selector_opt.is_some(),
2756 "agent should always support ModelSelector"
2757 );
2758 let selector = selector_opt.unwrap();
2759
2760 // Test list_models
2761 let listed_models = cx
2762 .update(|cx| selector.list_models(cx))
2763 .await
2764 .expect("list_models should succeed");
2765 let AgentModelList::Grouped(listed_models) = listed_models else {
2766 panic!("Unexpected model list type");
2767 };
2768 assert!(!listed_models.is_empty(), "should have at least one model");
2769 assert_eq!(
2770 listed_models[&AgentModelGroupName("Fake".into())][0]
2771 .id
2772 .0
2773 .as_ref(),
2774 "fake/fake"
2775 );
2776
2777 // Test selected_model returns the default
2778 let model = cx
2779 .update(|cx| selector.selected_model(cx))
2780 .await
2781 .expect("selected_model should succeed");
2782 let model = cx
2783 .update(|cx| agent.read(cx).models().model_from_id(&model.id))
2784 .unwrap();
2785 let model = model.as_fake();
2786 assert_eq!(model.id().0, "fake", "should return default model");
2787
2788 let request = acp_thread.update(cx, |thread, cx| thread.send(vec!["abc".into()], cx));
2789 cx.run_until_parked();
2790 model.send_last_completion_stream_text_chunk("def");
2791 cx.run_until_parked();
2792 acp_thread.read_with(cx, |thread, cx| {
2793 assert_eq!(
2794 thread.to_markdown(cx),
2795 indoc! {"
2796 ## User
2797
2798 abc
2799
2800 ## Assistant
2801
2802 def
2803
2804 "}
2805 )
2806 });
2807
2808 // Test cancel
2809 cx.update(|cx| connection.cancel(&session_id, cx));
2810 request.await.expect("prompt should fail gracefully");
2811
2812 // Ensure that dropping the ACP thread causes the native thread to be
2813 // dropped as well.
2814 cx.update(|_| drop(acp_thread));
2815 let result = cx
2816 .update(|cx| {
2817 connection.prompt(
2818 Some(acp_thread::UserMessageId::new()),
2819 acp::PromptRequest::new(session_id.clone(), vec!["ghi".into()]),
2820 cx,
2821 )
2822 })
2823 .await;
2824 assert_eq!(
2825 result.as_ref().unwrap_err().to_string(),
2826 "Session not found",
2827 "unexpected result: {:?}",
2828 result
2829 );
2830}
2831
2832#[gpui::test]
2833async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
2834 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2835 thread.update(cx, |thread, _cx| thread.add_tool(ThinkingTool));
2836 let fake_model = model.as_fake();
2837
2838 let mut events = thread
2839 .update(cx, |thread, cx| {
2840 thread.send(UserMessageId::new(), ["Think"], cx)
2841 })
2842 .unwrap();
2843 cx.run_until_parked();
2844
2845 // Simulate streaming partial input.
2846 let input = json!({});
2847 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2848 LanguageModelToolUse {
2849 id: "1".into(),
2850 name: ThinkingTool::name().into(),
2851 raw_input: input.to_string(),
2852 input,
2853 is_input_complete: false,
2854 thought_signature: None,
2855 },
2856 ));
2857
2858 // Input streaming completed
2859 let input = json!({ "content": "Thinking hard!" });
2860 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2861 LanguageModelToolUse {
2862 id: "1".into(),
2863 name: "thinking".into(),
2864 raw_input: input.to_string(),
2865 input,
2866 is_input_complete: true,
2867 thought_signature: None,
2868 },
2869 ));
2870 fake_model.end_last_completion_stream();
2871 cx.run_until_parked();
2872
2873 let tool_call = expect_tool_call(&mut events).await;
2874 assert_eq!(
2875 tool_call,
2876 acp::ToolCall::new("1", "Thinking")
2877 .kind(acp::ToolKind::Think)
2878 .raw_input(json!({}))
2879 .meta(acp::Meta::from_iter([(
2880 "tool_name".into(),
2881 "thinking".into()
2882 )]))
2883 );
2884 let update = expect_tool_call_update_fields(&mut events).await;
2885 assert_eq!(
2886 update,
2887 acp::ToolCallUpdate::new(
2888 "1",
2889 acp::ToolCallUpdateFields::new()
2890 .title("Thinking")
2891 .kind(acp::ToolKind::Think)
2892 .raw_input(json!({ "content": "Thinking hard!"}))
2893 )
2894 );
2895 let update = expect_tool_call_update_fields(&mut events).await;
2896 assert_eq!(
2897 update,
2898 acp::ToolCallUpdate::new(
2899 "1",
2900 acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
2901 )
2902 );
2903 let update = expect_tool_call_update_fields(&mut events).await;
2904 assert_eq!(
2905 update,
2906 acp::ToolCallUpdate::new(
2907 "1",
2908 acp::ToolCallUpdateFields::new().content(vec!["Thinking hard!".into()])
2909 )
2910 );
2911 let update = expect_tool_call_update_fields(&mut events).await;
2912 assert_eq!(
2913 update,
2914 acp::ToolCallUpdate::new(
2915 "1",
2916 acp::ToolCallUpdateFields::new()
2917 .status(acp::ToolCallStatus::Completed)
2918 .raw_output("Finished thinking.")
2919 )
2920 );
2921}
2922
2923#[gpui::test]
2924async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
2925 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2926 let fake_model = model.as_fake();
2927
2928 let mut events = thread
2929 .update(cx, |thread, cx| {
2930 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2931 thread.send(UserMessageId::new(), ["Hello!"], cx)
2932 })
2933 .unwrap();
2934 cx.run_until_parked();
2935
2936 fake_model.send_last_completion_stream_text_chunk("Hey!");
2937 fake_model.end_last_completion_stream();
2938
2939 let mut retry_events = Vec::new();
2940 while let Some(Ok(event)) = events.next().await {
2941 match event {
2942 ThreadEvent::Retry(retry_status) => {
2943 retry_events.push(retry_status);
2944 }
2945 ThreadEvent::Stop(..) => break,
2946 _ => {}
2947 }
2948 }
2949
2950 assert_eq!(retry_events.len(), 0);
2951 thread.read_with(cx, |thread, _cx| {
2952 assert_eq!(
2953 thread.to_markdown(),
2954 indoc! {"
2955 ## User
2956
2957 Hello!
2958
2959 ## Assistant
2960
2961 Hey!
2962 "}
2963 )
2964 });
2965}
2966
2967#[gpui::test]
2968async fn test_send_retry_on_error(cx: &mut TestAppContext) {
2969 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2970 let fake_model = model.as_fake();
2971
2972 let mut events = thread
2973 .update(cx, |thread, cx| {
2974 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2975 thread.send(UserMessageId::new(), ["Hello!"], cx)
2976 })
2977 .unwrap();
2978 cx.run_until_parked();
2979
2980 fake_model.send_last_completion_stream_text_chunk("Hey,");
2981 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
2982 provider: LanguageModelProviderName::new("Anthropic"),
2983 retry_after: Some(Duration::from_secs(3)),
2984 });
2985 fake_model.end_last_completion_stream();
2986
2987 cx.executor().advance_clock(Duration::from_secs(3));
2988 cx.run_until_parked();
2989
2990 fake_model.send_last_completion_stream_text_chunk("there!");
2991 fake_model.end_last_completion_stream();
2992 cx.run_until_parked();
2993
2994 let mut retry_events = Vec::new();
2995 while let Some(Ok(event)) = events.next().await {
2996 match event {
2997 ThreadEvent::Retry(retry_status) => {
2998 retry_events.push(retry_status);
2999 }
3000 ThreadEvent::Stop(..) => break,
3001 _ => {}
3002 }
3003 }
3004
3005 assert_eq!(retry_events.len(), 1);
3006 assert!(matches!(
3007 retry_events[0],
3008 acp_thread::RetryStatus { attempt: 1, .. }
3009 ));
3010 thread.read_with(cx, |thread, _cx| {
3011 assert_eq!(
3012 thread.to_markdown(),
3013 indoc! {"
3014 ## User
3015
3016 Hello!
3017
3018 ## Assistant
3019
3020 Hey,
3021
3022 [resume]
3023
3024 ## Assistant
3025
3026 there!
3027 "}
3028 )
3029 });
3030}
3031
3032#[gpui::test]
3033async fn test_send_retry_finishes_tool_calls_on_error(cx: &mut TestAppContext) {
3034 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3035 let fake_model = model.as_fake();
3036
3037 let events = thread
3038 .update(cx, |thread, cx| {
3039 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3040 thread.add_tool(EchoTool);
3041 thread.send(UserMessageId::new(), ["Call the echo tool!"], cx)
3042 })
3043 .unwrap();
3044 cx.run_until_parked();
3045
3046 let tool_use_1 = LanguageModelToolUse {
3047 id: "tool_1".into(),
3048 name: EchoTool::name().into(),
3049 raw_input: json!({"text": "test"}).to_string(),
3050 input: json!({"text": "test"}),
3051 is_input_complete: true,
3052 thought_signature: None,
3053 };
3054 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
3055 tool_use_1.clone(),
3056 ));
3057 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3058 provider: LanguageModelProviderName::new("Anthropic"),
3059 retry_after: Some(Duration::from_secs(3)),
3060 });
3061 fake_model.end_last_completion_stream();
3062
3063 cx.executor().advance_clock(Duration::from_secs(3));
3064 let completion = fake_model.pending_completions().pop().unwrap();
3065 assert_eq!(
3066 completion.messages[1..],
3067 vec![
3068 LanguageModelRequestMessage {
3069 role: Role::User,
3070 content: vec!["Call the echo tool!".into()],
3071 cache: false,
3072 reasoning_details: None,
3073 },
3074 LanguageModelRequestMessage {
3075 role: Role::Assistant,
3076 content: vec![language_model::MessageContent::ToolUse(tool_use_1.clone())],
3077 cache: false,
3078 reasoning_details: None,
3079 },
3080 LanguageModelRequestMessage {
3081 role: Role::User,
3082 content: vec![language_model::MessageContent::ToolResult(
3083 LanguageModelToolResult {
3084 tool_use_id: tool_use_1.id.clone(),
3085 tool_name: tool_use_1.name.clone(),
3086 is_error: false,
3087 content: "test".into(),
3088 output: Some("test".into())
3089 }
3090 )],
3091 cache: true,
3092 reasoning_details: None,
3093 },
3094 ]
3095 );
3096
3097 fake_model.send_last_completion_stream_text_chunk("Done");
3098 fake_model.end_last_completion_stream();
3099 cx.run_until_parked();
3100 events.collect::<Vec<_>>().await;
3101 thread.read_with(cx, |thread, _cx| {
3102 assert_eq!(
3103 thread.last_message(),
3104 Some(Message::Agent(AgentMessage {
3105 content: vec![AgentMessageContent::Text("Done".into())],
3106 tool_results: IndexMap::default(),
3107 reasoning_details: None,
3108 }))
3109 );
3110 })
3111}
3112
3113#[gpui::test]
3114async fn test_send_max_retries_exceeded(cx: &mut TestAppContext) {
3115 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3116 let fake_model = model.as_fake();
3117
3118 let mut events = thread
3119 .update(cx, |thread, cx| {
3120 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3121 thread.send(UserMessageId::new(), ["Hello!"], cx)
3122 })
3123 .unwrap();
3124 cx.run_until_parked();
3125
3126 for _ in 0..crate::thread::MAX_RETRY_ATTEMPTS + 1 {
3127 fake_model.send_last_completion_stream_error(
3128 LanguageModelCompletionError::ServerOverloaded {
3129 provider: LanguageModelProviderName::new("Anthropic"),
3130 retry_after: Some(Duration::from_secs(3)),
3131 },
3132 );
3133 fake_model.end_last_completion_stream();
3134 cx.executor().advance_clock(Duration::from_secs(3));
3135 cx.run_until_parked();
3136 }
3137
3138 let mut errors = Vec::new();
3139 let mut retry_events = Vec::new();
3140 while let Some(event) = events.next().await {
3141 match event {
3142 Ok(ThreadEvent::Retry(retry_status)) => {
3143 retry_events.push(retry_status);
3144 }
3145 Ok(ThreadEvent::Stop(..)) => break,
3146 Err(error) => errors.push(error),
3147 _ => {}
3148 }
3149 }
3150
3151 assert_eq!(
3152 retry_events.len(),
3153 crate::thread::MAX_RETRY_ATTEMPTS as usize
3154 );
3155 for i in 0..crate::thread::MAX_RETRY_ATTEMPTS as usize {
3156 assert_eq!(retry_events[i].attempt, i + 1);
3157 }
3158 assert_eq!(errors.len(), 1);
3159 let error = errors[0]
3160 .downcast_ref::<LanguageModelCompletionError>()
3161 .unwrap();
3162 assert!(matches!(
3163 error,
3164 LanguageModelCompletionError::ServerOverloaded { .. }
3165 ));
3166}
3167
3168/// Filters out the stop events for asserting against in tests
3169fn stop_events(result_events: Vec<Result<ThreadEvent>>) -> Vec<acp::StopReason> {
3170 result_events
3171 .into_iter()
3172 .filter_map(|event| match event.unwrap() {
3173 ThreadEvent::Stop(stop_reason) => Some(stop_reason),
3174 _ => None,
3175 })
3176 .collect()
3177}
3178
3179struct ThreadTest {
3180 model: Arc<dyn LanguageModel>,
3181 thread: Entity<Thread>,
3182 project_context: Entity<ProjectContext>,
3183 context_server_store: Entity<ContextServerStore>,
3184 fs: Arc<FakeFs>,
3185}
3186
3187enum TestModel {
3188 Sonnet4,
3189 Fake,
3190}
3191
3192impl TestModel {
3193 fn id(&self) -> LanguageModelId {
3194 match self {
3195 TestModel::Sonnet4 => LanguageModelId("claude-sonnet-4-latest".into()),
3196 TestModel::Fake => unreachable!(),
3197 }
3198 }
3199}
3200
3201async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
3202 cx.executor().allow_parking();
3203
3204 let fs = FakeFs::new(cx.background_executor.clone());
3205 fs.create_dir(paths::settings_file().parent().unwrap())
3206 .await
3207 .unwrap();
3208 fs.insert_file(
3209 paths::settings_file(),
3210 json!({
3211 "agent": {
3212 "default_profile": "test-profile",
3213 "profiles": {
3214 "test-profile": {
3215 "name": "Test Profile",
3216 "tools": {
3217 EchoTool::name(): true,
3218 DelayTool::name(): true,
3219 WordListTool::name(): true,
3220 ToolRequiringPermission::name(): true,
3221 InfiniteTool::name(): true,
3222 ThinkingTool::name(): true,
3223 "terminal": true,
3224 }
3225 }
3226 }
3227 }
3228 })
3229 .to_string()
3230 .into_bytes(),
3231 )
3232 .await;
3233
3234 cx.update(|cx| {
3235 settings::init(cx);
3236
3237 match model {
3238 TestModel::Fake => {}
3239 TestModel::Sonnet4 => {
3240 gpui_tokio::init(cx);
3241 let http_client = ReqwestClient::user_agent("agent tests").unwrap();
3242 cx.set_http_client(Arc::new(http_client));
3243 let client = Client::production(cx);
3244 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
3245 language_model::init(client.clone(), cx);
3246 language_models::init(user_store, client.clone(), cx);
3247 }
3248 };
3249
3250 watch_settings(fs.clone(), cx);
3251 });
3252
3253 let templates = Templates::new();
3254
3255 fs.insert_tree(path!("/test"), json!({})).await;
3256 let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
3257
3258 let model = cx
3259 .update(|cx| {
3260 if let TestModel::Fake = model {
3261 Task::ready(Arc::new(FakeLanguageModel::default()) as Arc<_>)
3262 } else {
3263 let model_id = model.id();
3264 let models = LanguageModelRegistry::read_global(cx);
3265 let model = models
3266 .available_models(cx)
3267 .find(|model| model.id() == model_id)
3268 .unwrap();
3269
3270 let provider = models.provider(&model.provider_id()).unwrap();
3271 let authenticated = provider.authenticate(cx);
3272
3273 cx.spawn(async move |_cx| {
3274 authenticated.await.unwrap();
3275 model
3276 })
3277 }
3278 })
3279 .await;
3280
3281 let project_context = cx.new(|_cx| ProjectContext::default());
3282 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3283 let context_server_registry =
3284 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3285 let thread = cx.new(|cx| {
3286 Thread::new(
3287 project,
3288 project_context.clone(),
3289 context_server_registry,
3290 templates,
3291 Some(model.clone()),
3292 cx,
3293 )
3294 });
3295 ThreadTest {
3296 model,
3297 thread,
3298 project_context,
3299 context_server_store,
3300 fs,
3301 }
3302}
3303
3304#[cfg(test)]
3305#[ctor::ctor]
3306fn init_logger() {
3307 if std::env::var("RUST_LOG").is_ok() {
3308 env_logger::init();
3309 }
3310}
3311
3312fn watch_settings(fs: Arc<dyn Fs>, cx: &mut App) {
3313 let fs = fs.clone();
3314 cx.spawn({
3315 async move |cx| {
3316 let mut new_settings_content_rx = settings::watch_config_file(
3317 cx.background_executor(),
3318 fs,
3319 paths::settings_file().clone(),
3320 );
3321
3322 while let Some(new_settings_content) = new_settings_content_rx.next().await {
3323 cx.update(|cx| {
3324 SettingsStore::update_global(cx, |settings, cx| {
3325 settings.set_user_settings(&new_settings_content, cx)
3326 })
3327 })
3328 .ok();
3329 }
3330 }
3331 })
3332 .detach();
3333}
3334
3335fn tool_names_for_completion(completion: &LanguageModelRequest) -> Vec<String> {
3336 completion
3337 .tools
3338 .iter()
3339 .map(|tool| tool.name.clone())
3340 .collect()
3341}
3342
3343fn setup_context_server(
3344 name: &'static str,
3345 tools: Vec<context_server::types::Tool>,
3346 context_server_store: &Entity<ContextServerStore>,
3347 cx: &mut TestAppContext,
3348) -> mpsc::UnboundedReceiver<(
3349 context_server::types::CallToolParams,
3350 oneshot::Sender<context_server::types::CallToolResponse>,
3351)> {
3352 cx.update(|cx| {
3353 let mut settings = ProjectSettings::get_global(cx).clone();
3354 settings.context_servers.insert(
3355 name.into(),
3356 project::project_settings::ContextServerSettings::Stdio {
3357 enabled: true,
3358 command: ContextServerCommand {
3359 path: "somebinary".into(),
3360 args: Vec::new(),
3361 env: None,
3362 timeout: None,
3363 },
3364 },
3365 );
3366 ProjectSettings::override_global(settings, cx);
3367 });
3368
3369 let (mcp_tool_calls_tx, mcp_tool_calls_rx) = mpsc::unbounded();
3370 let fake_transport = context_server::test::create_fake_transport(name, cx.executor())
3371 .on_request::<context_server::types::requests::Initialize, _>(move |_params| async move {
3372 context_server::types::InitializeResponse {
3373 protocol_version: context_server::types::ProtocolVersion(
3374 context_server::types::LATEST_PROTOCOL_VERSION.to_string(),
3375 ),
3376 server_info: context_server::types::Implementation {
3377 name: name.into(),
3378 version: "1.0.0".to_string(),
3379 },
3380 capabilities: context_server::types::ServerCapabilities {
3381 tools: Some(context_server::types::ToolsCapabilities {
3382 list_changed: Some(true),
3383 }),
3384 ..Default::default()
3385 },
3386 meta: None,
3387 }
3388 })
3389 .on_request::<context_server::types::requests::ListTools, _>(move |_params| {
3390 let tools = tools.clone();
3391 async move {
3392 context_server::types::ListToolsResponse {
3393 tools,
3394 next_cursor: None,
3395 meta: None,
3396 }
3397 }
3398 })
3399 .on_request::<context_server::types::requests::CallTool, _>(move |params| {
3400 let mcp_tool_calls_tx = mcp_tool_calls_tx.clone();
3401 async move {
3402 let (response_tx, response_rx) = oneshot::channel();
3403 mcp_tool_calls_tx
3404 .unbounded_send((params, response_tx))
3405 .unwrap();
3406 response_rx.await.unwrap()
3407 }
3408 });
3409 context_server_store.update(cx, |store, cx| {
3410 store.start_server(
3411 Arc::new(ContextServer::new(
3412 ContextServerId(name.into()),
3413 Arc::new(fake_transport),
3414 )),
3415 cx,
3416 );
3417 });
3418 cx.run_until_parked();
3419 mcp_tool_calls_rx
3420}
3421
3422#[gpui::test]
3423async fn test_tokens_before_message(cx: &mut TestAppContext) {
3424 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3425 let fake_model = model.as_fake();
3426
3427 // First message
3428 let message_1_id = UserMessageId::new();
3429 thread
3430 .update(cx, |thread, cx| {
3431 thread.send(message_1_id.clone(), ["First message"], cx)
3432 })
3433 .unwrap();
3434 cx.run_until_parked();
3435
3436 // Before any response, tokens_before_message should return None for first message
3437 thread.read_with(cx, |thread, _| {
3438 assert_eq!(
3439 thread.tokens_before_message(&message_1_id),
3440 None,
3441 "First message should have no tokens before it"
3442 );
3443 });
3444
3445 // Complete first message with usage
3446 fake_model.send_last_completion_stream_text_chunk("Response 1");
3447 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3448 language_model::TokenUsage {
3449 input_tokens: 100,
3450 output_tokens: 50,
3451 cache_creation_input_tokens: 0,
3452 cache_read_input_tokens: 0,
3453 },
3454 ));
3455 fake_model.end_last_completion_stream();
3456 cx.run_until_parked();
3457
3458 // First message still has no tokens before it
3459 thread.read_with(cx, |thread, _| {
3460 assert_eq!(
3461 thread.tokens_before_message(&message_1_id),
3462 None,
3463 "First message should still have no tokens before it after response"
3464 );
3465 });
3466
3467 // Second message
3468 let message_2_id = UserMessageId::new();
3469 thread
3470 .update(cx, |thread, cx| {
3471 thread.send(message_2_id.clone(), ["Second message"], cx)
3472 })
3473 .unwrap();
3474 cx.run_until_parked();
3475
3476 // Second message should have first message's input tokens before it
3477 thread.read_with(cx, |thread, _| {
3478 assert_eq!(
3479 thread.tokens_before_message(&message_2_id),
3480 Some(100),
3481 "Second message should have 100 tokens before it (from first request)"
3482 );
3483 });
3484
3485 // Complete second message
3486 fake_model.send_last_completion_stream_text_chunk("Response 2");
3487 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3488 language_model::TokenUsage {
3489 input_tokens: 250, // Total for this request (includes previous context)
3490 output_tokens: 75,
3491 cache_creation_input_tokens: 0,
3492 cache_read_input_tokens: 0,
3493 },
3494 ));
3495 fake_model.end_last_completion_stream();
3496 cx.run_until_parked();
3497
3498 // Third message
3499 let message_3_id = UserMessageId::new();
3500 thread
3501 .update(cx, |thread, cx| {
3502 thread.send(message_3_id.clone(), ["Third message"], cx)
3503 })
3504 .unwrap();
3505 cx.run_until_parked();
3506
3507 // Third message should have second message's input tokens (250) before it
3508 thread.read_with(cx, |thread, _| {
3509 assert_eq!(
3510 thread.tokens_before_message(&message_3_id),
3511 Some(250),
3512 "Third message should have 250 tokens before it (from second request)"
3513 );
3514 // Second message should still have 100
3515 assert_eq!(
3516 thread.tokens_before_message(&message_2_id),
3517 Some(100),
3518 "Second message should still have 100 tokens before it"
3519 );
3520 // First message still has none
3521 assert_eq!(
3522 thread.tokens_before_message(&message_1_id),
3523 None,
3524 "First message should still have no tokens before it"
3525 );
3526 });
3527}
3528
3529#[gpui::test]
3530async fn test_tokens_before_message_after_truncate(cx: &mut TestAppContext) {
3531 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3532 let fake_model = model.as_fake();
3533
3534 // Set up three messages with responses
3535 let message_1_id = UserMessageId::new();
3536 thread
3537 .update(cx, |thread, cx| {
3538 thread.send(message_1_id.clone(), ["Message 1"], cx)
3539 })
3540 .unwrap();
3541 cx.run_until_parked();
3542 fake_model.send_last_completion_stream_text_chunk("Response 1");
3543 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3544 language_model::TokenUsage {
3545 input_tokens: 100,
3546 output_tokens: 50,
3547 cache_creation_input_tokens: 0,
3548 cache_read_input_tokens: 0,
3549 },
3550 ));
3551 fake_model.end_last_completion_stream();
3552 cx.run_until_parked();
3553
3554 let message_2_id = UserMessageId::new();
3555 thread
3556 .update(cx, |thread, cx| {
3557 thread.send(message_2_id.clone(), ["Message 2"], cx)
3558 })
3559 .unwrap();
3560 cx.run_until_parked();
3561 fake_model.send_last_completion_stream_text_chunk("Response 2");
3562 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3563 language_model::TokenUsage {
3564 input_tokens: 250,
3565 output_tokens: 75,
3566 cache_creation_input_tokens: 0,
3567 cache_read_input_tokens: 0,
3568 },
3569 ));
3570 fake_model.end_last_completion_stream();
3571 cx.run_until_parked();
3572
3573 // Verify initial state
3574 thread.read_with(cx, |thread, _| {
3575 assert_eq!(thread.tokens_before_message(&message_2_id), Some(100));
3576 });
3577
3578 // Truncate at message 2 (removes message 2 and everything after)
3579 thread
3580 .update(cx, |thread, cx| thread.truncate(message_2_id.clone(), cx))
3581 .unwrap();
3582 cx.run_until_parked();
3583
3584 // After truncation, message_2_id no longer exists, so lookup should return None
3585 thread.read_with(cx, |thread, _| {
3586 assert_eq!(
3587 thread.tokens_before_message(&message_2_id),
3588 None,
3589 "After truncation, message 2 no longer exists"
3590 );
3591 // Message 1 still exists but has no tokens before it
3592 assert_eq!(
3593 thread.tokens_before_message(&message_1_id),
3594 None,
3595 "First message still has no tokens before it"
3596 );
3597 });
3598}