1use super::*;
2use acp_thread::{AgentConnection, AgentModelGroupName, AgentModelList, UserMessageId};
3use agent_client_protocol::{self as acp};
4use agent_settings::AgentProfileId;
5use anyhow::Result;
6use client::{Client, UserStore};
7use cloud_llm_client::CompletionIntent;
8use collections::IndexMap;
9use context_server::{ContextServer, ContextServerCommand, ContextServerId};
10use fs::{FakeFs, Fs};
11use futures::{
12 FutureExt as _, StreamExt,
13 channel::{
14 mpsc::{self, UnboundedReceiver},
15 oneshot,
16 },
17 future::{Fuse, Shared},
18};
19use gpui::{
20 App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal,
21 http_client::FakeHttpClient,
22};
23use indoc::indoc;
24use language_model::{
25 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
26 LanguageModelProviderName, LanguageModelRegistry, LanguageModelRequest,
27 LanguageModelRequestMessage, LanguageModelToolResult, LanguageModelToolSchemaFormat,
28 LanguageModelToolUse, MessageContent, Role, StopReason, fake_provider::FakeLanguageModel,
29};
30use pretty_assertions::assert_eq;
31use project::{
32 Project, context_server_store::ContextServerStore, project_settings::ProjectSettings,
33};
34use prompt_store::ProjectContext;
35use reqwest_client::ReqwestClient;
36use schemars::JsonSchema;
37use serde::{Deserialize, Serialize};
38use serde_json::json;
39use settings::{Settings, SettingsStore};
40use std::{
41 path::Path,
42 pin::Pin,
43 rc::Rc,
44 sync::{
45 Arc,
46 atomic::{AtomicBool, Ordering},
47 },
48 time::Duration,
49};
50use util::path;
51
52mod test_tools;
53use test_tools::*;
54
55fn init_test(cx: &mut TestAppContext) {
56 cx.update(|cx| {
57 let settings_store = SettingsStore::test(cx);
58 cx.set_global(settings_store);
59 });
60}
61
62struct FakeTerminalHandle {
63 killed: Arc<AtomicBool>,
64 stopped_by_user: Arc<AtomicBool>,
65 exit_sender: std::cell::RefCell<Option<futures::channel::oneshot::Sender<()>>>,
66 wait_for_exit: Shared<Task<acp::TerminalExitStatus>>,
67 output: acp::TerminalOutputResponse,
68 id: acp::TerminalId,
69}
70
71impl FakeTerminalHandle {
72 fn new_never_exits(cx: &mut App) -> Self {
73 let killed = Arc::new(AtomicBool::new(false));
74 let stopped_by_user = Arc::new(AtomicBool::new(false));
75
76 let (exit_sender, exit_receiver) = futures::channel::oneshot::channel();
77
78 let wait_for_exit = cx
79 .spawn(async move |_cx| {
80 // Wait for the exit signal (sent when kill() is called)
81 let _ = exit_receiver.await;
82 acp::TerminalExitStatus::new()
83 })
84 .shared();
85
86 Self {
87 killed,
88 stopped_by_user,
89 exit_sender: std::cell::RefCell::new(Some(exit_sender)),
90 wait_for_exit,
91 output: acp::TerminalOutputResponse::new("partial output".to_string(), false),
92 id: acp::TerminalId::new("fake_terminal".to_string()),
93 }
94 }
95
96 fn was_killed(&self) -> bool {
97 self.killed.load(Ordering::SeqCst)
98 }
99
100 fn set_stopped_by_user(&self, stopped: bool) {
101 self.stopped_by_user.store(stopped, Ordering::SeqCst);
102 }
103
104 fn signal_exit(&self) {
105 if let Some(sender) = self.exit_sender.borrow_mut().take() {
106 let _ = sender.send(());
107 }
108 }
109}
110
111impl crate::TerminalHandle for FakeTerminalHandle {
112 fn id(&self, _cx: &AsyncApp) -> Result<acp::TerminalId> {
113 Ok(self.id.clone())
114 }
115
116 fn current_output(&self, _cx: &AsyncApp) -> Result<acp::TerminalOutputResponse> {
117 Ok(self.output.clone())
118 }
119
120 fn wait_for_exit(&self, _cx: &AsyncApp) -> Result<Shared<Task<acp::TerminalExitStatus>>> {
121 Ok(self.wait_for_exit.clone())
122 }
123
124 fn kill(&self, _cx: &AsyncApp) -> Result<()> {
125 self.killed.store(true, Ordering::SeqCst);
126 self.signal_exit();
127 Ok(())
128 }
129
130 fn was_stopped_by_user(&self, _cx: &AsyncApp) -> Result<bool> {
131 Ok(self.stopped_by_user.load(Ordering::SeqCst))
132 }
133}
134
135struct FakeThreadEnvironment {
136 handle: Rc<FakeTerminalHandle>,
137}
138
139impl crate::ThreadEnvironment for FakeThreadEnvironment {
140 fn create_terminal(
141 &self,
142 _command: String,
143 _cwd: Option<std::path::PathBuf>,
144 _output_byte_limit: Option<u64>,
145 _cx: &mut AsyncApp,
146 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
147 Task::ready(Ok(self.handle.clone() as Rc<dyn crate::TerminalHandle>))
148 }
149}
150
151/// Environment that creates multiple independent terminal handles for testing concurrent terminals.
152struct MultiTerminalEnvironment {
153 handles: std::cell::RefCell<Vec<Rc<FakeTerminalHandle>>>,
154}
155
156impl MultiTerminalEnvironment {
157 fn new() -> Self {
158 Self {
159 handles: std::cell::RefCell::new(Vec::new()),
160 }
161 }
162
163 fn handles(&self) -> Vec<Rc<FakeTerminalHandle>> {
164 self.handles.borrow().clone()
165 }
166}
167
168impl crate::ThreadEnvironment for MultiTerminalEnvironment {
169 fn create_terminal(
170 &self,
171 _command: String,
172 _cwd: Option<std::path::PathBuf>,
173 _output_byte_limit: Option<u64>,
174 cx: &mut AsyncApp,
175 ) -> Task<Result<Rc<dyn crate::TerminalHandle>>> {
176 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
177 self.handles.borrow_mut().push(handle.clone());
178 Task::ready(Ok(handle as Rc<dyn crate::TerminalHandle>))
179 }
180}
181
182fn always_allow_tools(cx: &mut TestAppContext) {
183 cx.update(|cx| {
184 let mut settings = agent_settings::AgentSettings::get_global(cx).clone();
185 settings.always_allow_tool_actions = true;
186 agent_settings::AgentSettings::override_global(settings, cx);
187 });
188}
189
190#[gpui::test]
191async fn test_echo(cx: &mut TestAppContext) {
192 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
193 let fake_model = model.as_fake();
194
195 let events = thread
196 .update(cx, |thread, cx| {
197 thread.send(UserMessageId::new(), ["Testing: Reply with 'Hello'"], cx)
198 })
199 .unwrap();
200 cx.run_until_parked();
201 fake_model.send_last_completion_stream_text_chunk("Hello");
202 fake_model
203 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
204 fake_model.end_last_completion_stream();
205
206 let events = events.collect().await;
207 thread.update(cx, |thread, _cx| {
208 assert_eq!(
209 thread.last_message().unwrap().to_markdown(),
210 indoc! {"
211 ## Assistant
212
213 Hello
214 "}
215 )
216 });
217 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
218}
219
220#[gpui::test]
221async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) {
222 init_test(cx);
223 always_allow_tools(cx);
224
225 let fs = FakeFs::new(cx.executor());
226 let project = Project::test(fs, [], cx).await;
227
228 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
229 let environment = Rc::new(FakeThreadEnvironment {
230 handle: handle.clone(),
231 });
232
233 #[allow(clippy::arc_with_non_send_sync)]
234 let tool = Arc::new(crate::TerminalTool::new(project, environment));
235 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
236
237 let task = cx.update(|cx| {
238 tool.run(
239 crate::TerminalToolInput {
240 command: "sleep 1000".to_string(),
241 cd: ".".to_string(),
242 timeout_ms: Some(5),
243 },
244 event_stream,
245 cx,
246 )
247 });
248
249 let update = rx.expect_update_fields().await;
250 assert!(
251 update.content.iter().any(|blocks| {
252 blocks
253 .iter()
254 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
255 }),
256 "expected tool call update to include terminal content"
257 );
258
259 let mut task_future: Pin<Box<Fuse<Task<Result<String>>>>> = Box::pin(task.fuse());
260
261 let deadline = std::time::Instant::now() + Duration::from_millis(500);
262 loop {
263 if let Some(result) = task_future.as_mut().now_or_never() {
264 let result = result.expect("terminal tool task should complete");
265
266 assert!(
267 handle.was_killed(),
268 "expected terminal handle to be killed on timeout"
269 );
270 assert!(
271 result.contains("partial output"),
272 "expected result to include terminal output, got: {result}"
273 );
274 return;
275 }
276
277 if std::time::Instant::now() >= deadline {
278 panic!("timed out waiting for terminal tool task to complete");
279 }
280
281 cx.run_until_parked();
282 cx.background_executor.timer(Duration::from_millis(1)).await;
283 }
284}
285
286#[gpui::test]
287#[ignore]
288async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) {
289 init_test(cx);
290 always_allow_tools(cx);
291
292 let fs = FakeFs::new(cx.executor());
293 let project = Project::test(fs, [], cx).await;
294
295 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
296 let environment = Rc::new(FakeThreadEnvironment {
297 handle: handle.clone(),
298 });
299
300 #[allow(clippy::arc_with_non_send_sync)]
301 let tool = Arc::new(crate::TerminalTool::new(project, environment));
302 let (event_stream, mut rx) = crate::ToolCallEventStream::test();
303
304 let _task = cx.update(|cx| {
305 tool.run(
306 crate::TerminalToolInput {
307 command: "sleep 1000".to_string(),
308 cd: ".".to_string(),
309 timeout_ms: None,
310 },
311 event_stream,
312 cx,
313 )
314 });
315
316 let update = rx.expect_update_fields().await;
317 assert!(
318 update.content.iter().any(|blocks| {
319 blocks
320 .iter()
321 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
322 }),
323 "expected tool call update to include terminal content"
324 );
325
326 smol::Timer::after(Duration::from_millis(25)).await;
327
328 assert!(
329 !handle.was_killed(),
330 "did not expect terminal handle to be killed without a timeout"
331 );
332}
333
334#[gpui::test]
335async fn test_thinking(cx: &mut TestAppContext) {
336 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
337 let fake_model = model.as_fake();
338
339 let events = thread
340 .update(cx, |thread, cx| {
341 thread.send(
342 UserMessageId::new(),
343 [indoc! {"
344 Testing:
345
346 Generate a thinking step where you just think the word 'Think',
347 and have your final answer be 'Hello'
348 "}],
349 cx,
350 )
351 })
352 .unwrap();
353 cx.run_until_parked();
354 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Thinking {
355 text: "Think".to_string(),
356 signature: None,
357 });
358 fake_model.send_last_completion_stream_text_chunk("Hello");
359 fake_model
360 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
361 fake_model.end_last_completion_stream();
362
363 let events = events.collect().await;
364 thread.update(cx, |thread, _cx| {
365 assert_eq!(
366 thread.last_message().unwrap().to_markdown(),
367 indoc! {"
368 ## Assistant
369
370 <think>Think</think>
371 Hello
372 "}
373 )
374 });
375 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
376}
377
378#[gpui::test]
379async fn test_system_prompt(cx: &mut TestAppContext) {
380 let ThreadTest {
381 model,
382 thread,
383 project_context,
384 ..
385 } = setup(cx, TestModel::Fake).await;
386 let fake_model = model.as_fake();
387
388 project_context.update(cx, |project_context, _cx| {
389 project_context.shell = "test-shell".into()
390 });
391 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
392 thread
393 .update(cx, |thread, cx| {
394 thread.send(UserMessageId::new(), ["abc"], cx)
395 })
396 .unwrap();
397 cx.run_until_parked();
398 let mut pending_completions = fake_model.pending_completions();
399 assert_eq!(
400 pending_completions.len(),
401 1,
402 "unexpected pending completions: {:?}",
403 pending_completions
404 );
405
406 let pending_completion = pending_completions.pop().unwrap();
407 assert_eq!(pending_completion.messages[0].role, Role::System);
408
409 let system_message = &pending_completion.messages[0];
410 let system_prompt = system_message.content[0].to_str().unwrap();
411 assert!(
412 system_prompt.contains("test-shell"),
413 "unexpected system message: {:?}",
414 system_message
415 );
416 assert!(
417 system_prompt.contains("## Fixing Diagnostics"),
418 "unexpected system message: {:?}",
419 system_message
420 );
421}
422
423#[gpui::test]
424async fn test_system_prompt_without_tools(cx: &mut TestAppContext) {
425 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
426 let fake_model = model.as_fake();
427
428 thread
429 .update(cx, |thread, cx| {
430 thread.send(UserMessageId::new(), ["abc"], cx)
431 })
432 .unwrap();
433 cx.run_until_parked();
434 let mut pending_completions = fake_model.pending_completions();
435 assert_eq!(
436 pending_completions.len(),
437 1,
438 "unexpected pending completions: {:?}",
439 pending_completions
440 );
441
442 let pending_completion = pending_completions.pop().unwrap();
443 assert_eq!(pending_completion.messages[0].role, Role::System);
444
445 let system_message = &pending_completion.messages[0];
446 let system_prompt = system_message.content[0].to_str().unwrap();
447 assert!(
448 !system_prompt.contains("## Tool Use"),
449 "unexpected system message: {:?}",
450 system_message
451 );
452 assert!(
453 !system_prompt.contains("## Fixing Diagnostics"),
454 "unexpected system message: {:?}",
455 system_message
456 );
457}
458
459#[gpui::test]
460async fn test_prompt_caching(cx: &mut TestAppContext) {
461 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
462 let fake_model = model.as_fake();
463
464 // Send initial user message and verify it's cached
465 thread
466 .update(cx, |thread, cx| {
467 thread.send(UserMessageId::new(), ["Message 1"], cx)
468 })
469 .unwrap();
470 cx.run_until_parked();
471
472 let completion = fake_model.pending_completions().pop().unwrap();
473 assert_eq!(
474 completion.messages[1..],
475 vec![LanguageModelRequestMessage {
476 role: Role::User,
477 content: vec!["Message 1".into()],
478 cache: true,
479 reasoning_details: None,
480 }]
481 );
482 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
483 "Response to Message 1".into(),
484 ));
485 fake_model.end_last_completion_stream();
486 cx.run_until_parked();
487
488 // Send another user message and verify only the latest is cached
489 thread
490 .update(cx, |thread, cx| {
491 thread.send(UserMessageId::new(), ["Message 2"], cx)
492 })
493 .unwrap();
494 cx.run_until_parked();
495
496 let completion = fake_model.pending_completions().pop().unwrap();
497 assert_eq!(
498 completion.messages[1..],
499 vec![
500 LanguageModelRequestMessage {
501 role: Role::User,
502 content: vec!["Message 1".into()],
503 cache: false,
504 reasoning_details: None,
505 },
506 LanguageModelRequestMessage {
507 role: Role::Assistant,
508 content: vec!["Response to Message 1".into()],
509 cache: false,
510 reasoning_details: None,
511 },
512 LanguageModelRequestMessage {
513 role: Role::User,
514 content: vec!["Message 2".into()],
515 cache: true,
516 reasoning_details: None,
517 }
518 ]
519 );
520 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text(
521 "Response to Message 2".into(),
522 ));
523 fake_model.end_last_completion_stream();
524 cx.run_until_parked();
525
526 // Simulate a tool call and verify that the latest tool result is cached
527 thread.update(cx, |thread, _| thread.add_tool(EchoTool));
528 thread
529 .update(cx, |thread, cx| {
530 thread.send(UserMessageId::new(), ["Use the echo tool"], cx)
531 })
532 .unwrap();
533 cx.run_until_parked();
534
535 let tool_use = LanguageModelToolUse {
536 id: "tool_1".into(),
537 name: EchoTool::name().into(),
538 raw_input: json!({"text": "test"}).to_string(),
539 input: json!({"text": "test"}),
540 is_input_complete: true,
541 thought_signature: None,
542 };
543 fake_model
544 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
545 fake_model.end_last_completion_stream();
546 cx.run_until_parked();
547
548 let completion = fake_model.pending_completions().pop().unwrap();
549 let tool_result = LanguageModelToolResult {
550 tool_use_id: "tool_1".into(),
551 tool_name: EchoTool::name().into(),
552 is_error: false,
553 content: "test".into(),
554 output: Some("test".into()),
555 };
556 assert_eq!(
557 completion.messages[1..],
558 vec![
559 LanguageModelRequestMessage {
560 role: Role::User,
561 content: vec!["Message 1".into()],
562 cache: false,
563 reasoning_details: None,
564 },
565 LanguageModelRequestMessage {
566 role: Role::Assistant,
567 content: vec!["Response to Message 1".into()],
568 cache: false,
569 reasoning_details: None,
570 },
571 LanguageModelRequestMessage {
572 role: Role::User,
573 content: vec!["Message 2".into()],
574 cache: false,
575 reasoning_details: None,
576 },
577 LanguageModelRequestMessage {
578 role: Role::Assistant,
579 content: vec!["Response to Message 2".into()],
580 cache: false,
581 reasoning_details: None,
582 },
583 LanguageModelRequestMessage {
584 role: Role::User,
585 content: vec!["Use the echo tool".into()],
586 cache: false,
587 reasoning_details: None,
588 },
589 LanguageModelRequestMessage {
590 role: Role::Assistant,
591 content: vec![MessageContent::ToolUse(tool_use)],
592 cache: false,
593 reasoning_details: None,
594 },
595 LanguageModelRequestMessage {
596 role: Role::User,
597 content: vec![MessageContent::ToolResult(tool_result)],
598 cache: true,
599 reasoning_details: None,
600 }
601 ]
602 );
603}
604
605#[gpui::test]
606#[cfg_attr(not(feature = "e2e"), ignore)]
607async fn test_basic_tool_calls(cx: &mut TestAppContext) {
608 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
609
610 // Test a tool call that's likely to complete *before* streaming stops.
611 let events = thread
612 .update(cx, |thread, cx| {
613 thread.add_tool(EchoTool);
614 thread.send(
615 UserMessageId::new(),
616 ["Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'."],
617 cx,
618 )
619 })
620 .unwrap()
621 .collect()
622 .await;
623 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
624
625 // Test a tool calls that's likely to complete *after* streaming stops.
626 let events = thread
627 .update(cx, |thread, cx| {
628 thread.remove_tool(&EchoTool::name());
629 thread.add_tool(DelayTool);
630 thread.send(
631 UserMessageId::new(),
632 [
633 "Now call the delay tool with 200ms.",
634 "When the timer goes off, then you echo the output of the tool.",
635 ],
636 cx,
637 )
638 })
639 .unwrap()
640 .collect()
641 .await;
642 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
643 thread.update(cx, |thread, _cx| {
644 assert!(
645 thread
646 .last_message()
647 .unwrap()
648 .as_agent_message()
649 .unwrap()
650 .content
651 .iter()
652 .any(|content| {
653 if let AgentMessageContent::Text(text) = content {
654 text.contains("Ding")
655 } else {
656 false
657 }
658 }),
659 "{}",
660 thread.to_markdown()
661 );
662 });
663}
664
665#[gpui::test]
666#[cfg_attr(not(feature = "e2e"), ignore)]
667async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
668 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
669
670 // Test a tool call that's likely to complete *before* streaming stops.
671 let mut events = thread
672 .update(cx, |thread, cx| {
673 thread.add_tool(WordListTool);
674 thread.send(UserMessageId::new(), ["Test the word_list tool."], cx)
675 })
676 .unwrap();
677
678 let mut saw_partial_tool_use = false;
679 while let Some(event) = events.next().await {
680 if let Ok(ThreadEvent::ToolCall(tool_call)) = event {
681 thread.update(cx, |thread, _cx| {
682 // Look for a tool use in the thread's last message
683 let message = thread.last_message().unwrap();
684 let agent_message = message.as_agent_message().unwrap();
685 let last_content = agent_message.content.last().unwrap();
686 if let AgentMessageContent::ToolUse(last_tool_use) = last_content {
687 assert_eq!(last_tool_use.name.as_ref(), "word_list");
688 if tool_call.status == acp::ToolCallStatus::Pending {
689 if !last_tool_use.is_input_complete
690 && last_tool_use.input.get("g").is_none()
691 {
692 saw_partial_tool_use = true;
693 }
694 } else {
695 last_tool_use
696 .input
697 .get("a")
698 .expect("'a' has streamed because input is now complete");
699 last_tool_use
700 .input
701 .get("g")
702 .expect("'g' has streamed because input is now complete");
703 }
704 } else {
705 panic!("last content should be a tool use");
706 }
707 });
708 }
709 }
710
711 assert!(
712 saw_partial_tool_use,
713 "should see at least one partially streamed tool use in the history"
714 );
715}
716
717#[gpui::test]
718async fn test_tool_authorization(cx: &mut TestAppContext) {
719 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
720 let fake_model = model.as_fake();
721
722 let mut events = thread
723 .update(cx, |thread, cx| {
724 thread.add_tool(ToolRequiringPermission);
725 thread.send(UserMessageId::new(), ["abc"], cx)
726 })
727 .unwrap();
728 cx.run_until_parked();
729 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
730 LanguageModelToolUse {
731 id: "tool_id_1".into(),
732 name: ToolRequiringPermission::name().into(),
733 raw_input: "{}".into(),
734 input: json!({}),
735 is_input_complete: true,
736 thought_signature: None,
737 },
738 ));
739 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
740 LanguageModelToolUse {
741 id: "tool_id_2".into(),
742 name: ToolRequiringPermission::name().into(),
743 raw_input: "{}".into(),
744 input: json!({}),
745 is_input_complete: true,
746 thought_signature: None,
747 },
748 ));
749 fake_model.end_last_completion_stream();
750 let tool_call_auth_1 = next_tool_call_authorization(&mut events).await;
751 let tool_call_auth_2 = next_tool_call_authorization(&mut events).await;
752
753 // Approve the first
754 tool_call_auth_1
755 .response
756 .send(tool_call_auth_1.options[1].option_id.clone())
757 .unwrap();
758 cx.run_until_parked();
759
760 // Reject the second
761 tool_call_auth_2
762 .response
763 .send(tool_call_auth_1.options[2].option_id.clone())
764 .unwrap();
765 cx.run_until_parked();
766
767 let completion = fake_model.pending_completions().pop().unwrap();
768 let message = completion.messages.last().unwrap();
769 assert_eq!(
770 message.content,
771 vec![
772 language_model::MessageContent::ToolResult(LanguageModelToolResult {
773 tool_use_id: tool_call_auth_1.tool_call.tool_call_id.0.to_string().into(),
774 tool_name: ToolRequiringPermission::name().into(),
775 is_error: false,
776 content: "Allowed".into(),
777 output: Some("Allowed".into())
778 }),
779 language_model::MessageContent::ToolResult(LanguageModelToolResult {
780 tool_use_id: tool_call_auth_2.tool_call.tool_call_id.0.to_string().into(),
781 tool_name: ToolRequiringPermission::name().into(),
782 is_error: true,
783 content: "Permission to run tool denied by user".into(),
784 output: Some("Permission to run tool denied by user".into())
785 })
786 ]
787 );
788
789 // Simulate yet another tool call.
790 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
791 LanguageModelToolUse {
792 id: "tool_id_3".into(),
793 name: ToolRequiringPermission::name().into(),
794 raw_input: "{}".into(),
795 input: json!({}),
796 is_input_complete: true,
797 thought_signature: None,
798 },
799 ));
800 fake_model.end_last_completion_stream();
801
802 // Respond by always allowing tools.
803 let tool_call_auth_3 = next_tool_call_authorization(&mut events).await;
804 tool_call_auth_3
805 .response
806 .send(tool_call_auth_3.options[0].option_id.clone())
807 .unwrap();
808 cx.run_until_parked();
809 let completion = fake_model.pending_completions().pop().unwrap();
810 let message = completion.messages.last().unwrap();
811 assert_eq!(
812 message.content,
813 vec![language_model::MessageContent::ToolResult(
814 LanguageModelToolResult {
815 tool_use_id: tool_call_auth_3.tool_call.tool_call_id.0.to_string().into(),
816 tool_name: ToolRequiringPermission::name().into(),
817 is_error: false,
818 content: "Allowed".into(),
819 output: Some("Allowed".into())
820 }
821 )]
822 );
823
824 // Simulate a final tool call, ensuring we don't trigger authorization.
825 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
826 LanguageModelToolUse {
827 id: "tool_id_4".into(),
828 name: ToolRequiringPermission::name().into(),
829 raw_input: "{}".into(),
830 input: json!({}),
831 is_input_complete: true,
832 thought_signature: None,
833 },
834 ));
835 fake_model.end_last_completion_stream();
836 cx.run_until_parked();
837 let completion = fake_model.pending_completions().pop().unwrap();
838 let message = completion.messages.last().unwrap();
839 assert_eq!(
840 message.content,
841 vec![language_model::MessageContent::ToolResult(
842 LanguageModelToolResult {
843 tool_use_id: "tool_id_4".into(),
844 tool_name: ToolRequiringPermission::name().into(),
845 is_error: false,
846 content: "Allowed".into(),
847 output: Some("Allowed".into())
848 }
849 )]
850 );
851}
852
853#[gpui::test]
854async fn test_tool_hallucination(cx: &mut TestAppContext) {
855 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
856 let fake_model = model.as_fake();
857
858 let mut events = thread
859 .update(cx, |thread, cx| {
860 thread.send(UserMessageId::new(), ["abc"], cx)
861 })
862 .unwrap();
863 cx.run_until_parked();
864 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
865 LanguageModelToolUse {
866 id: "tool_id_1".into(),
867 name: "nonexistent_tool".into(),
868 raw_input: "{}".into(),
869 input: json!({}),
870 is_input_complete: true,
871 thought_signature: None,
872 },
873 ));
874 fake_model.end_last_completion_stream();
875
876 let tool_call = expect_tool_call(&mut events).await;
877 assert_eq!(tool_call.title, "nonexistent_tool");
878 assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
879 let update = expect_tool_call_update_fields(&mut events).await;
880 assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
881}
882
883#[gpui::test]
884async fn test_resume_after_tool_use_limit(cx: &mut TestAppContext) {
885 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
886 let fake_model = model.as_fake();
887
888 let events = thread
889 .update(cx, |thread, cx| {
890 thread.add_tool(EchoTool);
891 thread.send(UserMessageId::new(), ["abc"], cx)
892 })
893 .unwrap();
894 cx.run_until_parked();
895 let tool_use = LanguageModelToolUse {
896 id: "tool_id_1".into(),
897 name: EchoTool::name().into(),
898 raw_input: "{}".into(),
899 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
900 is_input_complete: true,
901 thought_signature: None,
902 };
903 fake_model
904 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
905 fake_model.end_last_completion_stream();
906
907 cx.run_until_parked();
908 let completion = fake_model.pending_completions().pop().unwrap();
909 let tool_result = LanguageModelToolResult {
910 tool_use_id: "tool_id_1".into(),
911 tool_name: EchoTool::name().into(),
912 is_error: false,
913 content: "def".into(),
914 output: Some("def".into()),
915 };
916 assert_eq!(
917 completion.messages[1..],
918 vec![
919 LanguageModelRequestMessage {
920 role: Role::User,
921 content: vec!["abc".into()],
922 cache: false,
923 reasoning_details: None,
924 },
925 LanguageModelRequestMessage {
926 role: Role::Assistant,
927 content: vec![MessageContent::ToolUse(tool_use.clone())],
928 cache: false,
929 reasoning_details: None,
930 },
931 LanguageModelRequestMessage {
932 role: Role::User,
933 content: vec![MessageContent::ToolResult(tool_result.clone())],
934 cache: true,
935 reasoning_details: None,
936 },
937 ]
938 );
939
940 // Simulate reaching tool use limit.
941 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
942 fake_model.end_last_completion_stream();
943 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
944 assert!(
945 last_event
946 .unwrap_err()
947 .is::<language_model::ToolUseLimitReachedError>()
948 );
949
950 let events = thread.update(cx, |thread, cx| thread.resume(cx)).unwrap();
951 cx.run_until_parked();
952 let completion = fake_model.pending_completions().pop().unwrap();
953 assert_eq!(
954 completion.messages[1..],
955 vec![
956 LanguageModelRequestMessage {
957 role: Role::User,
958 content: vec!["abc".into()],
959 cache: false,
960 reasoning_details: None,
961 },
962 LanguageModelRequestMessage {
963 role: Role::Assistant,
964 content: vec![MessageContent::ToolUse(tool_use)],
965 cache: false,
966 reasoning_details: None,
967 },
968 LanguageModelRequestMessage {
969 role: Role::User,
970 content: vec![MessageContent::ToolResult(tool_result)],
971 cache: false,
972 reasoning_details: None,
973 },
974 LanguageModelRequestMessage {
975 role: Role::User,
976 content: vec!["Continue where you left off".into()],
977 cache: true,
978 reasoning_details: None,
979 }
980 ]
981 );
982
983 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::Text("Done".into()));
984 fake_model.end_last_completion_stream();
985 events.collect::<Vec<_>>().await;
986 thread.read_with(cx, |thread, _cx| {
987 assert_eq!(
988 thread.last_message().unwrap().to_markdown(),
989 indoc! {"
990 ## Assistant
991
992 Done
993 "}
994 )
995 });
996}
997
998#[gpui::test]
999async fn test_send_after_tool_use_limit(cx: &mut TestAppContext) {
1000 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1001 let fake_model = model.as_fake();
1002
1003 let events = thread
1004 .update(cx, |thread, cx| {
1005 thread.add_tool(EchoTool);
1006 thread.send(UserMessageId::new(), ["abc"], cx)
1007 })
1008 .unwrap();
1009 cx.run_until_parked();
1010
1011 let tool_use = LanguageModelToolUse {
1012 id: "tool_id_1".into(),
1013 name: EchoTool::name().into(),
1014 raw_input: "{}".into(),
1015 input: serde_json::to_value(&EchoToolInput { text: "def".into() }).unwrap(),
1016 is_input_complete: true,
1017 thought_signature: None,
1018 };
1019 let tool_result = LanguageModelToolResult {
1020 tool_use_id: "tool_id_1".into(),
1021 tool_name: EchoTool::name().into(),
1022 is_error: false,
1023 content: "def".into(),
1024 output: Some("def".into()),
1025 };
1026 fake_model
1027 .send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(tool_use.clone()));
1028 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUseLimitReached);
1029 fake_model.end_last_completion_stream();
1030 let last_event = events.collect::<Vec<_>>().await.pop().unwrap();
1031 assert!(
1032 last_event
1033 .unwrap_err()
1034 .is::<language_model::ToolUseLimitReachedError>()
1035 );
1036
1037 thread
1038 .update(cx, |thread, cx| {
1039 thread.send(UserMessageId::new(), vec!["ghi"], cx)
1040 })
1041 .unwrap();
1042 cx.run_until_parked();
1043 let completion = fake_model.pending_completions().pop().unwrap();
1044 assert_eq!(
1045 completion.messages[1..],
1046 vec![
1047 LanguageModelRequestMessage {
1048 role: Role::User,
1049 content: vec!["abc".into()],
1050 cache: false,
1051 reasoning_details: None,
1052 },
1053 LanguageModelRequestMessage {
1054 role: Role::Assistant,
1055 content: vec![MessageContent::ToolUse(tool_use)],
1056 cache: false,
1057 reasoning_details: None,
1058 },
1059 LanguageModelRequestMessage {
1060 role: Role::User,
1061 content: vec![MessageContent::ToolResult(tool_result)],
1062 cache: false,
1063 reasoning_details: None,
1064 },
1065 LanguageModelRequestMessage {
1066 role: Role::User,
1067 content: vec!["ghi".into()],
1068 cache: true,
1069 reasoning_details: None,
1070 }
1071 ]
1072 );
1073}
1074
1075async fn expect_tool_call(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::ToolCall {
1076 let event = events
1077 .next()
1078 .await
1079 .expect("no tool call authorization event received")
1080 .unwrap();
1081 match event {
1082 ThreadEvent::ToolCall(tool_call) => tool_call,
1083 event => {
1084 panic!("Unexpected event {event:?}");
1085 }
1086 }
1087}
1088
1089async fn expect_tool_call_update_fields(
1090 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1091) -> acp::ToolCallUpdate {
1092 let event = events
1093 .next()
1094 .await
1095 .expect("no tool call authorization event received")
1096 .unwrap();
1097 match event {
1098 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(update)) => update,
1099 event => {
1100 panic!("Unexpected event {event:?}");
1101 }
1102 }
1103}
1104
1105async fn next_tool_call_authorization(
1106 events: &mut UnboundedReceiver<Result<ThreadEvent>>,
1107) -> ToolCallAuthorization {
1108 loop {
1109 let event = events
1110 .next()
1111 .await
1112 .expect("no tool call authorization event received")
1113 .unwrap();
1114 if let ThreadEvent::ToolCallAuthorization(tool_call_authorization) = event {
1115 let permission_kinds = tool_call_authorization
1116 .options
1117 .iter()
1118 .map(|o| o.kind)
1119 .collect::<Vec<_>>();
1120 assert_eq!(
1121 permission_kinds,
1122 vec![
1123 acp::PermissionOptionKind::AllowAlways,
1124 acp::PermissionOptionKind::AllowOnce,
1125 acp::PermissionOptionKind::RejectOnce,
1126 ]
1127 );
1128 return tool_call_authorization;
1129 }
1130 }
1131}
1132
1133#[gpui::test]
1134#[cfg_attr(not(feature = "e2e"), ignore)]
1135async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
1136 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1137
1138 // Test concurrent tool calls with different delay times
1139 let events = thread
1140 .update(cx, |thread, cx| {
1141 thread.add_tool(DelayTool);
1142 thread.send(
1143 UserMessageId::new(),
1144 [
1145 "Call the delay tool twice in the same message.",
1146 "Once with 100ms. Once with 300ms.",
1147 "When both timers are complete, describe the outputs.",
1148 ],
1149 cx,
1150 )
1151 })
1152 .unwrap()
1153 .collect()
1154 .await;
1155
1156 let stop_reasons = stop_events(events);
1157 assert_eq!(stop_reasons, vec![acp::StopReason::EndTurn]);
1158
1159 thread.update(cx, |thread, _cx| {
1160 let last_message = thread.last_message().unwrap();
1161 let agent_message = last_message.as_agent_message().unwrap();
1162 let text = agent_message
1163 .content
1164 .iter()
1165 .filter_map(|content| {
1166 if let AgentMessageContent::Text(text) = content {
1167 Some(text.as_str())
1168 } else {
1169 None
1170 }
1171 })
1172 .collect::<String>();
1173
1174 assert!(text.contains("Ding"));
1175 });
1176}
1177
1178#[gpui::test]
1179async fn test_profiles(cx: &mut TestAppContext) {
1180 let ThreadTest {
1181 model, thread, fs, ..
1182 } = setup(cx, TestModel::Fake).await;
1183 let fake_model = model.as_fake();
1184
1185 thread.update(cx, |thread, _cx| {
1186 thread.add_tool(DelayTool);
1187 thread.add_tool(EchoTool);
1188 thread.add_tool(InfiniteTool);
1189 });
1190
1191 // Override profiles and wait for settings to be loaded.
1192 fs.insert_file(
1193 paths::settings_file(),
1194 json!({
1195 "agent": {
1196 "profiles": {
1197 "test-1": {
1198 "name": "Test Profile 1",
1199 "tools": {
1200 EchoTool::name(): true,
1201 DelayTool::name(): true,
1202 }
1203 },
1204 "test-2": {
1205 "name": "Test Profile 2",
1206 "tools": {
1207 InfiniteTool::name(): true,
1208 }
1209 }
1210 }
1211 }
1212 })
1213 .to_string()
1214 .into_bytes(),
1215 )
1216 .await;
1217 cx.run_until_parked();
1218
1219 // Test that test-1 profile (default) has echo and delay tools
1220 thread
1221 .update(cx, |thread, cx| {
1222 thread.set_profile(AgentProfileId("test-1".into()), cx);
1223 thread.send(UserMessageId::new(), ["test"], cx)
1224 })
1225 .unwrap();
1226 cx.run_until_parked();
1227
1228 let mut pending_completions = fake_model.pending_completions();
1229 assert_eq!(pending_completions.len(), 1);
1230 let completion = pending_completions.pop().unwrap();
1231 let tool_names: Vec<String> = completion
1232 .tools
1233 .iter()
1234 .map(|tool| tool.name.clone())
1235 .collect();
1236 assert_eq!(tool_names, vec![DelayTool::name(), EchoTool::name()]);
1237 fake_model.end_last_completion_stream();
1238
1239 // Switch to test-2 profile, and verify that it has only the infinite tool.
1240 thread
1241 .update(cx, |thread, cx| {
1242 thread.set_profile(AgentProfileId("test-2".into()), cx);
1243 thread.send(UserMessageId::new(), ["test2"], cx)
1244 })
1245 .unwrap();
1246 cx.run_until_parked();
1247 let mut pending_completions = fake_model.pending_completions();
1248 assert_eq!(pending_completions.len(), 1);
1249 let completion = pending_completions.pop().unwrap();
1250 let tool_names: Vec<String> = completion
1251 .tools
1252 .iter()
1253 .map(|tool| tool.name.clone())
1254 .collect();
1255 assert_eq!(tool_names, vec![InfiniteTool::name()]);
1256}
1257
1258#[gpui::test]
1259async fn test_mcp_tools(cx: &mut TestAppContext) {
1260 let ThreadTest {
1261 model,
1262 thread,
1263 context_server_store,
1264 fs,
1265 ..
1266 } = setup(cx, TestModel::Fake).await;
1267 let fake_model = model.as_fake();
1268
1269 // Override profiles and wait for settings to be loaded.
1270 fs.insert_file(
1271 paths::settings_file(),
1272 json!({
1273 "agent": {
1274 "always_allow_tool_actions": true,
1275 "profiles": {
1276 "test": {
1277 "name": "Test Profile",
1278 "enable_all_context_servers": true,
1279 "tools": {
1280 EchoTool::name(): true,
1281 }
1282 },
1283 }
1284 }
1285 })
1286 .to_string()
1287 .into_bytes(),
1288 )
1289 .await;
1290 cx.run_until_parked();
1291 thread.update(cx, |thread, cx| {
1292 thread.set_profile(AgentProfileId("test".into()), cx)
1293 });
1294
1295 let mut mcp_tool_calls = setup_context_server(
1296 "test_server",
1297 vec![context_server::types::Tool {
1298 name: "echo".into(),
1299 description: None,
1300 input_schema: serde_json::to_value(EchoTool::input_schema(
1301 LanguageModelToolSchemaFormat::JsonSchema,
1302 ))
1303 .unwrap(),
1304 output_schema: None,
1305 annotations: None,
1306 }],
1307 &context_server_store,
1308 cx,
1309 );
1310
1311 let events = thread.update(cx, |thread, cx| {
1312 thread.send(UserMessageId::new(), ["Hey"], cx).unwrap()
1313 });
1314 cx.run_until_parked();
1315
1316 // Simulate the model calling the MCP tool.
1317 let completion = fake_model.pending_completions().pop().unwrap();
1318 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1319 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1320 LanguageModelToolUse {
1321 id: "tool_1".into(),
1322 name: "echo".into(),
1323 raw_input: json!({"text": "test"}).to_string(),
1324 input: json!({"text": "test"}),
1325 is_input_complete: true,
1326 thought_signature: None,
1327 },
1328 ));
1329 fake_model.end_last_completion_stream();
1330 cx.run_until_parked();
1331
1332 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1333 assert_eq!(tool_call_params.name, "echo");
1334 assert_eq!(tool_call_params.arguments, Some(json!({"text": "test"})));
1335 tool_call_response
1336 .send(context_server::types::CallToolResponse {
1337 content: vec![context_server::types::ToolResponseContent::Text {
1338 text: "test".into(),
1339 }],
1340 is_error: None,
1341 meta: None,
1342 structured_content: None,
1343 })
1344 .unwrap();
1345 cx.run_until_parked();
1346
1347 assert_eq!(tool_names_for_completion(&completion), vec!["echo"]);
1348 fake_model.send_last_completion_stream_text_chunk("Done!");
1349 fake_model.end_last_completion_stream();
1350 events.collect::<Vec<_>>().await;
1351
1352 // Send again after adding the echo tool, ensuring the name collision is resolved.
1353 let events = thread.update(cx, |thread, cx| {
1354 thread.add_tool(EchoTool);
1355 thread.send(UserMessageId::new(), ["Go"], cx).unwrap()
1356 });
1357 cx.run_until_parked();
1358 let completion = fake_model.pending_completions().pop().unwrap();
1359 assert_eq!(
1360 tool_names_for_completion(&completion),
1361 vec!["echo", "test_server_echo"]
1362 );
1363 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1364 LanguageModelToolUse {
1365 id: "tool_2".into(),
1366 name: "test_server_echo".into(),
1367 raw_input: json!({"text": "mcp"}).to_string(),
1368 input: json!({"text": "mcp"}),
1369 is_input_complete: true,
1370 thought_signature: None,
1371 },
1372 ));
1373 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1374 LanguageModelToolUse {
1375 id: "tool_3".into(),
1376 name: "echo".into(),
1377 raw_input: json!({"text": "native"}).to_string(),
1378 input: json!({"text": "native"}),
1379 is_input_complete: true,
1380 thought_signature: None,
1381 },
1382 ));
1383 fake_model.end_last_completion_stream();
1384 cx.run_until_parked();
1385
1386 let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
1387 assert_eq!(tool_call_params.name, "echo");
1388 assert_eq!(tool_call_params.arguments, Some(json!({"text": "mcp"})));
1389 tool_call_response
1390 .send(context_server::types::CallToolResponse {
1391 content: vec![context_server::types::ToolResponseContent::Text { text: "mcp".into() }],
1392 is_error: None,
1393 meta: None,
1394 structured_content: None,
1395 })
1396 .unwrap();
1397 cx.run_until_parked();
1398
1399 // Ensure the tool results were inserted with the correct names.
1400 let completion = fake_model.pending_completions().pop().unwrap();
1401 assert_eq!(
1402 completion.messages.last().unwrap().content,
1403 vec![
1404 MessageContent::ToolResult(LanguageModelToolResult {
1405 tool_use_id: "tool_3".into(),
1406 tool_name: "echo".into(),
1407 is_error: false,
1408 content: "native".into(),
1409 output: Some("native".into()),
1410 },),
1411 MessageContent::ToolResult(LanguageModelToolResult {
1412 tool_use_id: "tool_2".into(),
1413 tool_name: "test_server_echo".into(),
1414 is_error: false,
1415 content: "mcp".into(),
1416 output: Some("mcp".into()),
1417 },),
1418 ]
1419 );
1420 fake_model.end_last_completion_stream();
1421 events.collect::<Vec<_>>().await;
1422}
1423
1424#[gpui::test]
1425async fn test_mcp_tool_truncation(cx: &mut TestAppContext) {
1426 let ThreadTest {
1427 model,
1428 thread,
1429 context_server_store,
1430 fs,
1431 ..
1432 } = setup(cx, TestModel::Fake).await;
1433 let fake_model = model.as_fake();
1434
1435 // Set up a profile with all tools enabled
1436 fs.insert_file(
1437 paths::settings_file(),
1438 json!({
1439 "agent": {
1440 "profiles": {
1441 "test": {
1442 "name": "Test Profile",
1443 "enable_all_context_servers": true,
1444 "tools": {
1445 EchoTool::name(): true,
1446 DelayTool::name(): true,
1447 WordListTool::name(): true,
1448 ToolRequiringPermission::name(): true,
1449 InfiniteTool::name(): true,
1450 }
1451 },
1452 }
1453 }
1454 })
1455 .to_string()
1456 .into_bytes(),
1457 )
1458 .await;
1459 cx.run_until_parked();
1460
1461 thread.update(cx, |thread, cx| {
1462 thread.set_profile(AgentProfileId("test".into()), cx);
1463 thread.add_tool(EchoTool);
1464 thread.add_tool(DelayTool);
1465 thread.add_tool(WordListTool);
1466 thread.add_tool(ToolRequiringPermission);
1467 thread.add_tool(InfiniteTool);
1468 });
1469
1470 // Set up multiple context servers with some overlapping tool names
1471 let _server1_calls = setup_context_server(
1472 "xxx",
1473 vec![
1474 context_server::types::Tool {
1475 name: "echo".into(), // Conflicts with native EchoTool
1476 description: None,
1477 input_schema: serde_json::to_value(EchoTool::input_schema(
1478 LanguageModelToolSchemaFormat::JsonSchema,
1479 ))
1480 .unwrap(),
1481 output_schema: None,
1482 annotations: None,
1483 },
1484 context_server::types::Tool {
1485 name: "unique_tool_1".into(),
1486 description: None,
1487 input_schema: json!({"type": "object", "properties": {}}),
1488 output_schema: None,
1489 annotations: None,
1490 },
1491 ],
1492 &context_server_store,
1493 cx,
1494 );
1495
1496 let _server2_calls = setup_context_server(
1497 "yyy",
1498 vec![
1499 context_server::types::Tool {
1500 name: "echo".into(), // Also conflicts with native EchoTool
1501 description: None,
1502 input_schema: serde_json::to_value(EchoTool::input_schema(
1503 LanguageModelToolSchemaFormat::JsonSchema,
1504 ))
1505 .unwrap(),
1506 output_schema: None,
1507 annotations: None,
1508 },
1509 context_server::types::Tool {
1510 name: "unique_tool_2".into(),
1511 description: None,
1512 input_schema: json!({"type": "object", "properties": {}}),
1513 output_schema: None,
1514 annotations: None,
1515 },
1516 context_server::types::Tool {
1517 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1518 description: None,
1519 input_schema: json!({"type": "object", "properties": {}}),
1520 output_schema: None,
1521 annotations: None,
1522 },
1523 context_server::types::Tool {
1524 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1525 description: None,
1526 input_schema: json!({"type": "object", "properties": {}}),
1527 output_schema: None,
1528 annotations: None,
1529 },
1530 ],
1531 &context_server_store,
1532 cx,
1533 );
1534 let _server3_calls = setup_context_server(
1535 "zzz",
1536 vec![
1537 context_server::types::Tool {
1538 name: "a".repeat(MAX_TOOL_NAME_LENGTH - 2),
1539 description: None,
1540 input_schema: json!({"type": "object", "properties": {}}),
1541 output_schema: None,
1542 annotations: None,
1543 },
1544 context_server::types::Tool {
1545 name: "b".repeat(MAX_TOOL_NAME_LENGTH - 1),
1546 description: None,
1547 input_schema: json!({"type": "object", "properties": {}}),
1548 output_schema: None,
1549 annotations: None,
1550 },
1551 context_server::types::Tool {
1552 name: "c".repeat(MAX_TOOL_NAME_LENGTH + 1),
1553 description: None,
1554 input_schema: json!({"type": "object", "properties": {}}),
1555 output_schema: None,
1556 annotations: None,
1557 },
1558 ],
1559 &context_server_store,
1560 cx,
1561 );
1562
1563 thread
1564 .update(cx, |thread, cx| {
1565 thread.send(UserMessageId::new(), ["Go"], cx)
1566 })
1567 .unwrap();
1568 cx.run_until_parked();
1569 let completion = fake_model.pending_completions().pop().unwrap();
1570 assert_eq!(
1571 tool_names_for_completion(&completion),
1572 vec![
1573 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1574 "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
1575 "delay",
1576 "echo",
1577 "infinite",
1578 "tool_requiring_permission",
1579 "unique_tool_1",
1580 "unique_tool_2",
1581 "word_list",
1582 "xxx_echo",
1583 "y_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1584 "yyy_echo",
1585 "z_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1586 ]
1587 );
1588}
1589
1590#[gpui::test]
1591#[cfg_attr(not(feature = "e2e"), ignore)]
1592async fn test_cancellation(cx: &mut TestAppContext) {
1593 let ThreadTest { thread, .. } = setup(cx, TestModel::Sonnet4).await;
1594
1595 let mut events = thread
1596 .update(cx, |thread, cx| {
1597 thread.add_tool(InfiniteTool);
1598 thread.add_tool(EchoTool);
1599 thread.send(
1600 UserMessageId::new(),
1601 ["Call the echo tool, then call the infinite tool, then explain their output"],
1602 cx,
1603 )
1604 })
1605 .unwrap();
1606
1607 // Wait until both tools are called.
1608 let mut expected_tools = vec!["Echo", "Infinite Tool"];
1609 let mut echo_id = None;
1610 let mut echo_completed = false;
1611 while let Some(event) = events.next().await {
1612 match event.unwrap() {
1613 ThreadEvent::ToolCall(tool_call) => {
1614 assert_eq!(tool_call.title, expected_tools.remove(0));
1615 if tool_call.title == "Echo" {
1616 echo_id = Some(tool_call.tool_call_id);
1617 }
1618 }
1619 ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1620 acp::ToolCallUpdate {
1621 tool_call_id,
1622 fields:
1623 acp::ToolCallUpdateFields {
1624 status: Some(acp::ToolCallStatus::Completed),
1625 ..
1626 },
1627 ..
1628 },
1629 )) if Some(&tool_call_id) == echo_id.as_ref() => {
1630 echo_completed = true;
1631 }
1632 _ => {}
1633 }
1634
1635 if expected_tools.is_empty() && echo_completed {
1636 break;
1637 }
1638 }
1639
1640 // Cancel the current send and ensure that the event stream is closed, even
1641 // if one of the tools is still running.
1642 thread.update(cx, |thread, cx| thread.cancel(cx)).await;
1643 let events = events.collect::<Vec<_>>().await;
1644 let last_event = events.last();
1645 assert!(
1646 matches!(
1647 last_event,
1648 Some(Ok(ThreadEvent::Stop(acp::StopReason::Cancelled)))
1649 ),
1650 "unexpected event {last_event:?}"
1651 );
1652
1653 // Ensure we can still send a new message after cancellation.
1654 let events = thread
1655 .update(cx, |thread, cx| {
1656 thread.send(
1657 UserMessageId::new(),
1658 ["Testing: reply with 'Hello' then stop."],
1659 cx,
1660 )
1661 })
1662 .unwrap()
1663 .collect::<Vec<_>>()
1664 .await;
1665 thread.update(cx, |thread, _cx| {
1666 let message = thread.last_message().unwrap();
1667 let agent_message = message.as_agent_message().unwrap();
1668 assert_eq!(
1669 agent_message.content,
1670 vec![AgentMessageContent::Text("Hello".to_string())]
1671 );
1672 });
1673 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1674}
1675
1676#[gpui::test]
1677async fn test_terminal_tool_cancellation_captures_output(cx: &mut TestAppContext) {
1678 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1679 always_allow_tools(cx);
1680 let fake_model = model.as_fake();
1681
1682 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1683 let environment = Rc::new(FakeThreadEnvironment {
1684 handle: handle.clone(),
1685 });
1686
1687 let mut events = thread
1688 .update(cx, |thread, cx| {
1689 thread.add_tool(crate::TerminalTool::new(
1690 thread.project().clone(),
1691 environment,
1692 ));
1693 thread.send(UserMessageId::new(), ["run a command"], cx)
1694 })
1695 .unwrap();
1696
1697 cx.run_until_parked();
1698
1699 // Simulate the model calling the terminal tool
1700 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1701 LanguageModelToolUse {
1702 id: "terminal_tool_1".into(),
1703 name: "terminal".into(),
1704 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1705 input: json!({"command": "sleep 1000", "cd": "."}),
1706 is_input_complete: true,
1707 thought_signature: None,
1708 },
1709 ));
1710 fake_model.end_last_completion_stream();
1711
1712 // Wait for the terminal tool to start running
1713 wait_for_terminal_tool_started(&mut events, cx).await;
1714
1715 // Cancel the thread while the terminal is running
1716 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
1717
1718 // Collect remaining events, driving the executor to let cancellation complete
1719 let remaining_events = collect_events_until_stop(&mut events, cx).await;
1720
1721 // Verify the terminal was killed
1722 assert!(
1723 handle.was_killed(),
1724 "expected terminal handle to be killed on cancellation"
1725 );
1726
1727 // Verify we got a cancellation stop event
1728 assert_eq!(
1729 stop_events(remaining_events),
1730 vec![acp::StopReason::Cancelled],
1731 );
1732
1733 // Verify the tool result contains the terminal output, not just "Tool canceled by user"
1734 thread.update(cx, |thread, _cx| {
1735 let message = thread.last_message().unwrap();
1736 let agent_message = message.as_agent_message().unwrap();
1737
1738 let tool_use = agent_message
1739 .content
1740 .iter()
1741 .find_map(|content| match content {
1742 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
1743 _ => None,
1744 })
1745 .expect("expected tool use in agent message");
1746
1747 let tool_result = agent_message
1748 .tool_results
1749 .get(&tool_use.id)
1750 .expect("expected tool result");
1751
1752 let result_text = match &tool_result.content {
1753 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
1754 _ => panic!("expected text content in tool result"),
1755 };
1756
1757 // "partial output" comes from FakeTerminalHandle's output field
1758 assert!(
1759 result_text.contains("partial output"),
1760 "expected tool result to contain terminal output, got: {result_text}"
1761 );
1762 // Match the actual format from process_content in terminal_tool.rs
1763 assert!(
1764 result_text.contains("The user stopped this command"),
1765 "expected tool result to indicate user stopped, got: {result_text}"
1766 );
1767 });
1768
1769 // Verify we can send a new message after cancellation
1770 verify_thread_recovery(&thread, &fake_model, cx).await;
1771}
1772
1773/// Helper to verify thread can recover after cancellation by sending a simple message.
1774async fn verify_thread_recovery(
1775 thread: &Entity<Thread>,
1776 fake_model: &FakeLanguageModel,
1777 cx: &mut TestAppContext,
1778) {
1779 let events = thread
1780 .update(cx, |thread, cx| {
1781 thread.send(
1782 UserMessageId::new(),
1783 ["Testing: reply with 'Hello' then stop."],
1784 cx,
1785 )
1786 })
1787 .unwrap();
1788 cx.run_until_parked();
1789 fake_model.send_last_completion_stream_text_chunk("Hello");
1790 fake_model
1791 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
1792 fake_model.end_last_completion_stream();
1793
1794 let events = events.collect::<Vec<_>>().await;
1795 thread.update(cx, |thread, _cx| {
1796 let message = thread.last_message().unwrap();
1797 let agent_message = message.as_agent_message().unwrap();
1798 assert_eq!(
1799 agent_message.content,
1800 vec![AgentMessageContent::Text("Hello".to_string())]
1801 );
1802 });
1803 assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]);
1804}
1805
1806/// Waits for a terminal tool to start by watching for a ToolCallUpdate with terminal content.
1807async fn wait_for_terminal_tool_started(
1808 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1809 cx: &mut TestAppContext,
1810) {
1811 let deadline = cx.executor().num_cpus() * 100; // Scale with available parallelism
1812 for _ in 0..deadline {
1813 cx.run_until_parked();
1814
1815 while let Some(Some(event)) = events.next().now_or_never() {
1816 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1817 update,
1818 ))) = &event
1819 {
1820 if update.fields.content.as_ref().is_some_and(|content| {
1821 content
1822 .iter()
1823 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1824 }) {
1825 return;
1826 }
1827 }
1828 }
1829
1830 cx.background_executor
1831 .timer(Duration::from_millis(10))
1832 .await;
1833 }
1834 panic!("terminal tool did not start within the expected time");
1835}
1836
1837/// Collects events until a Stop event is received, driving the executor to completion.
1838async fn collect_events_until_stop(
1839 events: &mut mpsc::UnboundedReceiver<Result<ThreadEvent>>,
1840 cx: &mut TestAppContext,
1841) -> Vec<Result<ThreadEvent>> {
1842 let mut collected = Vec::new();
1843 let deadline = cx.executor().num_cpus() * 200;
1844
1845 for _ in 0..deadline {
1846 cx.executor().advance_clock(Duration::from_millis(10));
1847 cx.run_until_parked();
1848
1849 while let Some(Some(event)) = events.next().now_or_never() {
1850 let is_stop = matches!(&event, Ok(ThreadEvent::Stop(_)));
1851 collected.push(event);
1852 if is_stop {
1853 return collected;
1854 }
1855 }
1856 }
1857 panic!(
1858 "did not receive Stop event within the expected time; collected {} events",
1859 collected.len()
1860 );
1861}
1862
1863#[gpui::test]
1864async fn test_truncate_while_terminal_tool_running(cx: &mut TestAppContext) {
1865 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1866 always_allow_tools(cx);
1867 let fake_model = model.as_fake();
1868
1869 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
1870 let environment = Rc::new(FakeThreadEnvironment {
1871 handle: handle.clone(),
1872 });
1873
1874 let message_id = UserMessageId::new();
1875 let mut events = thread
1876 .update(cx, |thread, cx| {
1877 thread.add_tool(crate::TerminalTool::new(
1878 thread.project().clone(),
1879 environment,
1880 ));
1881 thread.send(message_id.clone(), ["run a command"], cx)
1882 })
1883 .unwrap();
1884
1885 cx.run_until_parked();
1886
1887 // Simulate the model calling the terminal tool
1888 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1889 LanguageModelToolUse {
1890 id: "terminal_tool_1".into(),
1891 name: "terminal".into(),
1892 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1893 input: json!({"command": "sleep 1000", "cd": "."}),
1894 is_input_complete: true,
1895 thought_signature: None,
1896 },
1897 ));
1898 fake_model.end_last_completion_stream();
1899
1900 // Wait for the terminal tool to start running
1901 wait_for_terminal_tool_started(&mut events, cx).await;
1902
1903 // Truncate the thread while the terminal is running
1904 thread
1905 .update(cx, |thread, cx| thread.truncate(message_id, cx))
1906 .unwrap();
1907
1908 // Drive the executor to let cancellation complete
1909 let _ = collect_events_until_stop(&mut events, cx).await;
1910
1911 // Verify the terminal was killed
1912 assert!(
1913 handle.was_killed(),
1914 "expected terminal handle to be killed on truncate"
1915 );
1916
1917 // Verify the thread is empty after truncation
1918 thread.update(cx, |thread, _cx| {
1919 assert_eq!(
1920 thread.to_markdown(),
1921 "",
1922 "expected thread to be empty after truncating the only message"
1923 );
1924 });
1925
1926 // Verify we can send a new message after truncation
1927 verify_thread_recovery(&thread, &fake_model, cx).await;
1928}
1929
1930#[gpui::test]
1931async fn test_cancel_multiple_concurrent_terminal_tools(cx: &mut TestAppContext) {
1932 // Tests that cancellation properly kills all running terminal tools when multiple are active.
1933 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
1934 always_allow_tools(cx);
1935 let fake_model = model.as_fake();
1936
1937 let environment = Rc::new(MultiTerminalEnvironment::new());
1938
1939 let mut events = thread
1940 .update(cx, |thread, cx| {
1941 thread.add_tool(crate::TerminalTool::new(
1942 thread.project().clone(),
1943 environment.clone(),
1944 ));
1945 thread.send(UserMessageId::new(), ["run multiple commands"], cx)
1946 })
1947 .unwrap();
1948
1949 cx.run_until_parked();
1950
1951 // Simulate the model calling two terminal tools
1952 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1953 LanguageModelToolUse {
1954 id: "terminal_tool_1".into(),
1955 name: "terminal".into(),
1956 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
1957 input: json!({"command": "sleep 1000", "cd": "."}),
1958 is_input_complete: true,
1959 thought_signature: None,
1960 },
1961 ));
1962 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
1963 LanguageModelToolUse {
1964 id: "terminal_tool_2".into(),
1965 name: "terminal".into(),
1966 raw_input: r#"{"command": "sleep 2000", "cd": "."}"#.into(),
1967 input: json!({"command": "sleep 2000", "cd": "."}),
1968 is_input_complete: true,
1969 thought_signature: None,
1970 },
1971 ));
1972 fake_model.end_last_completion_stream();
1973
1974 // Wait for both terminal tools to start by counting terminal content updates
1975 let mut terminals_started = 0;
1976 let deadline = cx.executor().num_cpus() * 100;
1977 for _ in 0..deadline {
1978 cx.run_until_parked();
1979
1980 while let Some(Some(event)) = events.next().now_or_never() {
1981 if let Ok(ThreadEvent::ToolCallUpdate(acp_thread::ToolCallUpdate::UpdateFields(
1982 update,
1983 ))) = &event
1984 {
1985 if update.fields.content.as_ref().is_some_and(|content| {
1986 content
1987 .iter()
1988 .any(|c| matches!(c, acp::ToolCallContent::Terminal(_)))
1989 }) {
1990 terminals_started += 1;
1991 if terminals_started >= 2 {
1992 break;
1993 }
1994 }
1995 }
1996 }
1997 if terminals_started >= 2 {
1998 break;
1999 }
2000
2001 cx.background_executor
2002 .timer(Duration::from_millis(10))
2003 .await;
2004 }
2005 assert!(
2006 terminals_started >= 2,
2007 "expected 2 terminal tools to start, got {terminals_started}"
2008 );
2009
2010 // Cancel the thread while both terminals are running
2011 thread.update(cx, |thread, cx| thread.cancel(cx)).detach();
2012
2013 // Collect remaining events
2014 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2015
2016 // Verify both terminal handles were killed
2017 let handles = environment.handles();
2018 assert_eq!(
2019 handles.len(),
2020 2,
2021 "expected 2 terminal handles to be created"
2022 );
2023 assert!(
2024 handles[0].was_killed(),
2025 "expected first terminal handle to be killed on cancellation"
2026 );
2027 assert!(
2028 handles[1].was_killed(),
2029 "expected second terminal handle to be killed on cancellation"
2030 );
2031
2032 // Verify we got a cancellation stop event
2033 assert_eq!(
2034 stop_events(remaining_events),
2035 vec![acp::StopReason::Cancelled],
2036 );
2037}
2038
2039#[gpui::test]
2040async fn test_terminal_tool_stopped_via_terminal_card_button(cx: &mut TestAppContext) {
2041 // Tests that clicking the stop button on the terminal card (as opposed to the main
2042 // cancel button) properly reports user stopped via the was_stopped_by_user path.
2043 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2044 always_allow_tools(cx);
2045 let fake_model = model.as_fake();
2046
2047 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2048 let environment = Rc::new(FakeThreadEnvironment {
2049 handle: handle.clone(),
2050 });
2051
2052 let mut events = thread
2053 .update(cx, |thread, cx| {
2054 thread.add_tool(crate::TerminalTool::new(
2055 thread.project().clone(),
2056 environment,
2057 ));
2058 thread.send(UserMessageId::new(), ["run a command"], cx)
2059 })
2060 .unwrap();
2061
2062 cx.run_until_parked();
2063
2064 // Simulate the model calling the terminal tool
2065 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2066 LanguageModelToolUse {
2067 id: "terminal_tool_1".into(),
2068 name: "terminal".into(),
2069 raw_input: r#"{"command": "sleep 1000", "cd": "."}"#.into(),
2070 input: json!({"command": "sleep 1000", "cd": "."}),
2071 is_input_complete: true,
2072 thought_signature: None,
2073 },
2074 ));
2075 fake_model.end_last_completion_stream();
2076
2077 // Wait for the terminal tool to start running
2078 wait_for_terminal_tool_started(&mut events, cx).await;
2079
2080 // Simulate user clicking stop on the terminal card itself.
2081 // This sets the flag and signals exit (simulating what the real UI would do).
2082 handle.set_stopped_by_user(true);
2083 handle.killed.store(true, Ordering::SeqCst);
2084 handle.signal_exit();
2085
2086 // Wait for the tool to complete
2087 cx.run_until_parked();
2088
2089 // The thread continues after tool completion - simulate the model ending its turn
2090 fake_model
2091 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2092 fake_model.end_last_completion_stream();
2093
2094 // Collect remaining events
2095 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2096
2097 // Verify we got an EndTurn (not Cancelled, since we didn't cancel the thread)
2098 assert_eq!(
2099 stop_events(remaining_events),
2100 vec![acp::StopReason::EndTurn],
2101 );
2102
2103 // Verify the tool result indicates user stopped
2104 thread.update(cx, |thread, _cx| {
2105 let message = thread.last_message().unwrap();
2106 let agent_message = message.as_agent_message().unwrap();
2107
2108 let tool_use = agent_message
2109 .content
2110 .iter()
2111 .find_map(|content| match content {
2112 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2113 _ => None,
2114 })
2115 .expect("expected tool use in agent message");
2116
2117 let tool_result = agent_message
2118 .tool_results
2119 .get(&tool_use.id)
2120 .expect("expected tool result");
2121
2122 let result_text = match &tool_result.content {
2123 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2124 _ => panic!("expected text content in tool result"),
2125 };
2126
2127 assert!(
2128 result_text.contains("The user stopped this command"),
2129 "expected tool result to indicate user stopped, got: {result_text}"
2130 );
2131 });
2132}
2133
2134#[gpui::test]
2135async fn test_terminal_tool_timeout_expires(cx: &mut TestAppContext) {
2136 // Tests that when a timeout is configured and expires, the tool result indicates timeout.
2137 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2138 always_allow_tools(cx);
2139 let fake_model = model.as_fake();
2140
2141 let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx)));
2142 let environment = Rc::new(FakeThreadEnvironment {
2143 handle: handle.clone(),
2144 });
2145
2146 let mut events = thread
2147 .update(cx, |thread, cx| {
2148 thread.add_tool(crate::TerminalTool::new(
2149 thread.project().clone(),
2150 environment,
2151 ));
2152 thread.send(UserMessageId::new(), ["run a command with timeout"], cx)
2153 })
2154 .unwrap();
2155
2156 cx.run_until_parked();
2157
2158 // Simulate the model calling the terminal tool with a short timeout
2159 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2160 LanguageModelToolUse {
2161 id: "terminal_tool_1".into(),
2162 name: "terminal".into(),
2163 raw_input: r#"{"command": "sleep 1000", "cd": ".", "timeout_ms": 100}"#.into(),
2164 input: json!({"command": "sleep 1000", "cd": ".", "timeout_ms": 100}),
2165 is_input_complete: true,
2166 thought_signature: None,
2167 },
2168 ));
2169 fake_model.end_last_completion_stream();
2170
2171 // Wait for the terminal tool to start running
2172 wait_for_terminal_tool_started(&mut events, cx).await;
2173
2174 // Advance clock past the timeout
2175 cx.executor().advance_clock(Duration::from_millis(200));
2176 cx.run_until_parked();
2177
2178 // The thread continues after tool completion - simulate the model ending its turn
2179 fake_model
2180 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2181 fake_model.end_last_completion_stream();
2182
2183 // Collect remaining events
2184 let remaining_events = collect_events_until_stop(&mut events, cx).await;
2185
2186 // Verify the terminal was killed due to timeout
2187 assert!(
2188 handle.was_killed(),
2189 "expected terminal handle to be killed on timeout"
2190 );
2191
2192 // Verify we got an EndTurn (the tool completed, just with timeout)
2193 assert_eq!(
2194 stop_events(remaining_events),
2195 vec![acp::StopReason::EndTurn],
2196 );
2197
2198 // Verify the tool result indicates timeout, not user stopped
2199 thread.update(cx, |thread, _cx| {
2200 let message = thread.last_message().unwrap();
2201 let agent_message = message.as_agent_message().unwrap();
2202
2203 let tool_use = agent_message
2204 .content
2205 .iter()
2206 .find_map(|content| match content {
2207 AgentMessageContent::ToolUse(tool_use) => Some(tool_use),
2208 _ => None,
2209 })
2210 .expect("expected tool use in agent message");
2211
2212 let tool_result = agent_message
2213 .tool_results
2214 .get(&tool_use.id)
2215 .expect("expected tool result");
2216
2217 let result_text = match &tool_result.content {
2218 language_model::LanguageModelToolResultContent::Text(text) => text.to_string(),
2219 _ => panic!("expected text content in tool result"),
2220 };
2221
2222 assert!(
2223 result_text.contains("timed out"),
2224 "expected tool result to indicate timeout, got: {result_text}"
2225 );
2226 assert!(
2227 !result_text.contains("The user stopped"),
2228 "tool result should not mention user stopped when it timed out, got: {result_text}"
2229 );
2230 });
2231}
2232
2233#[gpui::test]
2234async fn test_in_progress_send_canceled_by_next_send(cx: &mut TestAppContext) {
2235 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2236 let fake_model = model.as_fake();
2237
2238 let events_1 = thread
2239 .update(cx, |thread, cx| {
2240 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2241 })
2242 .unwrap();
2243 cx.run_until_parked();
2244 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2245 cx.run_until_parked();
2246
2247 let events_2 = thread
2248 .update(cx, |thread, cx| {
2249 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2250 })
2251 .unwrap();
2252 cx.run_until_parked();
2253 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2254 fake_model
2255 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2256 fake_model.end_last_completion_stream();
2257
2258 let events_1 = events_1.collect::<Vec<_>>().await;
2259 assert_eq!(stop_events(events_1), vec![acp::StopReason::Cancelled]);
2260 let events_2 = events_2.collect::<Vec<_>>().await;
2261 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2262}
2263
2264#[gpui::test]
2265async fn test_subsequent_successful_sends_dont_cancel(cx: &mut TestAppContext) {
2266 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2267 let fake_model = model.as_fake();
2268
2269 let events_1 = thread
2270 .update(cx, |thread, cx| {
2271 thread.send(UserMessageId::new(), ["Hello 1"], cx)
2272 })
2273 .unwrap();
2274 cx.run_until_parked();
2275 fake_model.send_last_completion_stream_text_chunk("Hey 1!");
2276 fake_model
2277 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2278 fake_model.end_last_completion_stream();
2279 let events_1 = events_1.collect::<Vec<_>>().await;
2280
2281 let events_2 = thread
2282 .update(cx, |thread, cx| {
2283 thread.send(UserMessageId::new(), ["Hello 2"], cx)
2284 })
2285 .unwrap();
2286 cx.run_until_parked();
2287 fake_model.send_last_completion_stream_text_chunk("Hey 2!");
2288 fake_model
2289 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::EndTurn));
2290 fake_model.end_last_completion_stream();
2291 let events_2 = events_2.collect::<Vec<_>>().await;
2292
2293 assert_eq!(stop_events(events_1), vec![acp::StopReason::EndTurn]);
2294 assert_eq!(stop_events(events_2), vec![acp::StopReason::EndTurn]);
2295}
2296
2297#[gpui::test]
2298async fn test_refusal(cx: &mut TestAppContext) {
2299 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2300 let fake_model = model.as_fake();
2301
2302 let events = thread
2303 .update(cx, |thread, cx| {
2304 thread.send(UserMessageId::new(), ["Hello"], cx)
2305 })
2306 .unwrap();
2307 cx.run_until_parked();
2308 thread.read_with(cx, |thread, _| {
2309 assert_eq!(
2310 thread.to_markdown(),
2311 indoc! {"
2312 ## User
2313
2314 Hello
2315 "}
2316 );
2317 });
2318
2319 fake_model.send_last_completion_stream_text_chunk("Hey!");
2320 cx.run_until_parked();
2321 thread.read_with(cx, |thread, _| {
2322 assert_eq!(
2323 thread.to_markdown(),
2324 indoc! {"
2325 ## User
2326
2327 Hello
2328
2329 ## Assistant
2330
2331 Hey!
2332 "}
2333 );
2334 });
2335
2336 // If the model refuses to continue, the thread should remove all the messages after the last user message.
2337 fake_model
2338 .send_last_completion_stream_event(LanguageModelCompletionEvent::Stop(StopReason::Refusal));
2339 let events = events.collect::<Vec<_>>().await;
2340 assert_eq!(stop_events(events), vec![acp::StopReason::Refusal]);
2341 thread.read_with(cx, |thread, _| {
2342 assert_eq!(thread.to_markdown(), "");
2343 });
2344}
2345
2346#[gpui::test]
2347async fn test_truncate_first_message(cx: &mut TestAppContext) {
2348 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2349 let fake_model = model.as_fake();
2350
2351 let message_id = UserMessageId::new();
2352 thread
2353 .update(cx, |thread, cx| {
2354 thread.send(message_id.clone(), ["Hello"], cx)
2355 })
2356 .unwrap();
2357 cx.run_until_parked();
2358 thread.read_with(cx, |thread, _| {
2359 assert_eq!(
2360 thread.to_markdown(),
2361 indoc! {"
2362 ## User
2363
2364 Hello
2365 "}
2366 );
2367 assert_eq!(thread.latest_token_usage(), None);
2368 });
2369
2370 fake_model.send_last_completion_stream_text_chunk("Hey!");
2371 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2372 language_model::TokenUsage {
2373 input_tokens: 32_000,
2374 output_tokens: 16_000,
2375 cache_creation_input_tokens: 0,
2376 cache_read_input_tokens: 0,
2377 },
2378 ));
2379 cx.run_until_parked();
2380 thread.read_with(cx, |thread, _| {
2381 assert_eq!(
2382 thread.to_markdown(),
2383 indoc! {"
2384 ## User
2385
2386 Hello
2387
2388 ## Assistant
2389
2390 Hey!
2391 "}
2392 );
2393 assert_eq!(
2394 thread.latest_token_usage(),
2395 Some(acp_thread::TokenUsage {
2396 used_tokens: 32_000 + 16_000,
2397 max_tokens: 1_000_000,
2398 })
2399 );
2400 });
2401
2402 thread
2403 .update(cx, |thread, cx| thread.truncate(message_id, cx))
2404 .unwrap();
2405 cx.run_until_parked();
2406 thread.read_with(cx, |thread, _| {
2407 assert_eq!(thread.to_markdown(), "");
2408 assert_eq!(thread.latest_token_usage(), None);
2409 });
2410
2411 // Ensure we can still send a new message after truncation.
2412 thread
2413 .update(cx, |thread, cx| {
2414 thread.send(UserMessageId::new(), ["Hi"], cx)
2415 })
2416 .unwrap();
2417 thread.update(cx, |thread, _cx| {
2418 assert_eq!(
2419 thread.to_markdown(),
2420 indoc! {"
2421 ## User
2422
2423 Hi
2424 "}
2425 );
2426 });
2427 cx.run_until_parked();
2428 fake_model.send_last_completion_stream_text_chunk("Ahoy!");
2429 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2430 language_model::TokenUsage {
2431 input_tokens: 40_000,
2432 output_tokens: 20_000,
2433 cache_creation_input_tokens: 0,
2434 cache_read_input_tokens: 0,
2435 },
2436 ));
2437 cx.run_until_parked();
2438 thread.read_with(cx, |thread, _| {
2439 assert_eq!(
2440 thread.to_markdown(),
2441 indoc! {"
2442 ## User
2443
2444 Hi
2445
2446 ## Assistant
2447
2448 Ahoy!
2449 "}
2450 );
2451
2452 assert_eq!(
2453 thread.latest_token_usage(),
2454 Some(acp_thread::TokenUsage {
2455 used_tokens: 40_000 + 20_000,
2456 max_tokens: 1_000_000,
2457 })
2458 );
2459 });
2460}
2461
2462#[gpui::test]
2463async fn test_truncate_second_message(cx: &mut TestAppContext) {
2464 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2465 let fake_model = model.as_fake();
2466
2467 thread
2468 .update(cx, |thread, cx| {
2469 thread.send(UserMessageId::new(), ["Message 1"], cx)
2470 })
2471 .unwrap();
2472 cx.run_until_parked();
2473 fake_model.send_last_completion_stream_text_chunk("Message 1 response");
2474 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2475 language_model::TokenUsage {
2476 input_tokens: 32_000,
2477 output_tokens: 16_000,
2478 cache_creation_input_tokens: 0,
2479 cache_read_input_tokens: 0,
2480 },
2481 ));
2482 fake_model.end_last_completion_stream();
2483 cx.run_until_parked();
2484
2485 let assert_first_message_state = |cx: &mut TestAppContext| {
2486 thread.clone().read_with(cx, |thread, _| {
2487 assert_eq!(
2488 thread.to_markdown(),
2489 indoc! {"
2490 ## User
2491
2492 Message 1
2493
2494 ## Assistant
2495
2496 Message 1 response
2497 "}
2498 );
2499
2500 assert_eq!(
2501 thread.latest_token_usage(),
2502 Some(acp_thread::TokenUsage {
2503 used_tokens: 32_000 + 16_000,
2504 max_tokens: 1_000_000,
2505 })
2506 );
2507 });
2508 };
2509
2510 assert_first_message_state(cx);
2511
2512 let second_message_id = UserMessageId::new();
2513 thread
2514 .update(cx, |thread, cx| {
2515 thread.send(second_message_id.clone(), ["Message 2"], cx)
2516 })
2517 .unwrap();
2518 cx.run_until_parked();
2519
2520 fake_model.send_last_completion_stream_text_chunk("Message 2 response");
2521 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
2522 language_model::TokenUsage {
2523 input_tokens: 40_000,
2524 output_tokens: 20_000,
2525 cache_creation_input_tokens: 0,
2526 cache_read_input_tokens: 0,
2527 },
2528 ));
2529 fake_model.end_last_completion_stream();
2530 cx.run_until_parked();
2531
2532 thread.read_with(cx, |thread, _| {
2533 assert_eq!(
2534 thread.to_markdown(),
2535 indoc! {"
2536 ## User
2537
2538 Message 1
2539
2540 ## Assistant
2541
2542 Message 1 response
2543
2544 ## User
2545
2546 Message 2
2547
2548 ## Assistant
2549
2550 Message 2 response
2551 "}
2552 );
2553
2554 assert_eq!(
2555 thread.latest_token_usage(),
2556 Some(acp_thread::TokenUsage {
2557 used_tokens: 40_000 + 20_000,
2558 max_tokens: 1_000_000,
2559 })
2560 );
2561 });
2562
2563 thread
2564 .update(cx, |thread, cx| thread.truncate(second_message_id, cx))
2565 .unwrap();
2566 cx.run_until_parked();
2567
2568 assert_first_message_state(cx);
2569}
2570
2571#[gpui::test]
2572async fn test_title_generation(cx: &mut TestAppContext) {
2573 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2574 let fake_model = model.as_fake();
2575
2576 let summary_model = Arc::new(FakeLanguageModel::default());
2577 thread.update(cx, |thread, cx| {
2578 thread.set_summarization_model(Some(summary_model.clone()), cx)
2579 });
2580
2581 let send = thread
2582 .update(cx, |thread, cx| {
2583 thread.send(UserMessageId::new(), ["Hello"], cx)
2584 })
2585 .unwrap();
2586 cx.run_until_parked();
2587
2588 fake_model.send_last_completion_stream_text_chunk("Hey!");
2589 fake_model.end_last_completion_stream();
2590 cx.run_until_parked();
2591 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "New Thread"));
2592
2593 // Ensure the summary model has been invoked to generate a title.
2594 summary_model.send_last_completion_stream_text_chunk("Hello ");
2595 summary_model.send_last_completion_stream_text_chunk("world\nG");
2596 summary_model.send_last_completion_stream_text_chunk("oodnight Moon");
2597 summary_model.end_last_completion_stream();
2598 send.collect::<Vec<_>>().await;
2599 cx.run_until_parked();
2600 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2601
2602 // Send another message, ensuring no title is generated this time.
2603 let send = thread
2604 .update(cx, |thread, cx| {
2605 thread.send(UserMessageId::new(), ["Hello again"], cx)
2606 })
2607 .unwrap();
2608 cx.run_until_parked();
2609 fake_model.send_last_completion_stream_text_chunk("Hey again!");
2610 fake_model.end_last_completion_stream();
2611 cx.run_until_parked();
2612 assert_eq!(summary_model.pending_completions(), Vec::new());
2613 send.collect::<Vec<_>>().await;
2614 thread.read_with(cx, |thread, _| assert_eq!(thread.title(), "Hello world"));
2615}
2616
2617#[gpui::test]
2618async fn test_building_request_with_pending_tools(cx: &mut TestAppContext) {
2619 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
2620 let fake_model = model.as_fake();
2621
2622 let _events = thread
2623 .update(cx, |thread, cx| {
2624 thread.add_tool(ToolRequiringPermission);
2625 thread.add_tool(EchoTool);
2626 thread.send(UserMessageId::new(), ["Hey!"], cx)
2627 })
2628 .unwrap();
2629 cx.run_until_parked();
2630
2631 let permission_tool_use = LanguageModelToolUse {
2632 id: "tool_id_1".into(),
2633 name: ToolRequiringPermission::name().into(),
2634 raw_input: "{}".into(),
2635 input: json!({}),
2636 is_input_complete: true,
2637 thought_signature: None,
2638 };
2639 let echo_tool_use = LanguageModelToolUse {
2640 id: "tool_id_2".into(),
2641 name: EchoTool::name().into(),
2642 raw_input: json!({"text": "test"}).to_string(),
2643 input: json!({"text": "test"}),
2644 is_input_complete: true,
2645 thought_signature: None,
2646 };
2647 fake_model.send_last_completion_stream_text_chunk("Hi!");
2648 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2649 permission_tool_use,
2650 ));
2651 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2652 echo_tool_use.clone(),
2653 ));
2654 fake_model.end_last_completion_stream();
2655 cx.run_until_parked();
2656
2657 // Ensure pending tools are skipped when building a request.
2658 let request = thread
2659 .read_with(cx, |thread, cx| {
2660 thread.build_completion_request(CompletionIntent::EditFile, cx)
2661 })
2662 .unwrap();
2663 assert_eq!(
2664 request.messages[1..],
2665 vec![
2666 LanguageModelRequestMessage {
2667 role: Role::User,
2668 content: vec!["Hey!".into()],
2669 cache: true,
2670 reasoning_details: None,
2671 },
2672 LanguageModelRequestMessage {
2673 role: Role::Assistant,
2674 content: vec![
2675 MessageContent::Text("Hi!".into()),
2676 MessageContent::ToolUse(echo_tool_use.clone())
2677 ],
2678 cache: false,
2679 reasoning_details: None,
2680 },
2681 LanguageModelRequestMessage {
2682 role: Role::User,
2683 content: vec![MessageContent::ToolResult(LanguageModelToolResult {
2684 tool_use_id: echo_tool_use.id.clone(),
2685 tool_name: echo_tool_use.name,
2686 is_error: false,
2687 content: "test".into(),
2688 output: Some("test".into())
2689 })],
2690 cache: false,
2691 reasoning_details: None,
2692 },
2693 ],
2694 );
2695}
2696
2697#[gpui::test]
2698async fn test_agent_connection(cx: &mut TestAppContext) {
2699 cx.update(settings::init);
2700 let templates = Templates::new();
2701
2702 // Initialize language model system with test provider
2703 cx.update(|cx| {
2704 gpui_tokio::init(cx);
2705
2706 let http_client = FakeHttpClient::with_404_response();
2707 let clock = Arc::new(clock::FakeSystemClock::new());
2708 let client = Client::new(clock, http_client, cx);
2709 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
2710 language_model::init(client.clone(), cx);
2711 language_models::init(user_store, client.clone(), cx);
2712 LanguageModelRegistry::test(cx);
2713 });
2714 cx.executor().forbid_parking();
2715
2716 // Create a project for new_thread
2717 let fake_fs = cx.update(|cx| fs::FakeFs::new(cx.background_executor().clone()));
2718 fake_fs.insert_tree(path!("/test"), json!({})).await;
2719 let project = Project::test(fake_fs.clone(), [Path::new("/test")], cx).await;
2720 let cwd = Path::new("/test");
2721 let text_thread_store =
2722 cx.new(|cx| assistant_text_thread::TextThreadStore::fake(project.clone(), cx));
2723 let history_store = cx.new(|cx| HistoryStore::new(text_thread_store, cx));
2724
2725 // Create agent and connection
2726 let agent = NativeAgent::new(
2727 project.clone(),
2728 history_store,
2729 templates.clone(),
2730 None,
2731 fake_fs.clone(),
2732 &mut cx.to_async(),
2733 )
2734 .await
2735 .unwrap();
2736 let connection = NativeAgentConnection(agent.clone());
2737
2738 // Create a thread using new_thread
2739 let connection_rc = Rc::new(connection.clone());
2740 let acp_thread = cx
2741 .update(|cx| connection_rc.new_thread(project, cwd, cx))
2742 .await
2743 .expect("new_thread should succeed");
2744
2745 // Get the session_id from the AcpThread
2746 let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
2747
2748 // Test model_selector returns Some
2749 let selector_opt = connection.model_selector(&session_id);
2750 assert!(
2751 selector_opt.is_some(),
2752 "agent should always support ModelSelector"
2753 );
2754 let selector = selector_opt.unwrap();
2755
2756 // Test list_models
2757 let listed_models = cx
2758 .update(|cx| selector.list_models(cx))
2759 .await
2760 .expect("list_models should succeed");
2761 let AgentModelList::Grouped(listed_models) = listed_models else {
2762 panic!("Unexpected model list type");
2763 };
2764 assert!(!listed_models.is_empty(), "should have at least one model");
2765 assert_eq!(
2766 listed_models[&AgentModelGroupName("Fake".into())][0]
2767 .id
2768 .0
2769 .as_ref(),
2770 "fake/fake"
2771 );
2772
2773 // Test selected_model returns the default
2774 let model = cx
2775 .update(|cx| selector.selected_model(cx))
2776 .await
2777 .expect("selected_model should succeed");
2778 let model = cx
2779 .update(|cx| agent.read(cx).models().model_from_id(&model.id))
2780 .unwrap();
2781 let model = model.as_fake();
2782 assert_eq!(model.id().0, "fake", "should return default model");
2783
2784 let request = acp_thread.update(cx, |thread, cx| thread.send(vec!["abc".into()], cx));
2785 cx.run_until_parked();
2786 model.send_last_completion_stream_text_chunk("def");
2787 cx.run_until_parked();
2788 acp_thread.read_with(cx, |thread, cx| {
2789 assert_eq!(
2790 thread.to_markdown(cx),
2791 indoc! {"
2792 ## User
2793
2794 abc
2795
2796 ## Assistant
2797
2798 def
2799
2800 "}
2801 )
2802 });
2803
2804 // Test cancel
2805 cx.update(|cx| connection.cancel(&session_id, cx));
2806 request.await.expect("prompt should fail gracefully");
2807
2808 // Ensure that dropping the ACP thread causes the native thread to be
2809 // dropped as well.
2810 cx.update(|_| drop(acp_thread));
2811 let result = cx
2812 .update(|cx| {
2813 connection.prompt(
2814 Some(acp_thread::UserMessageId::new()),
2815 acp::PromptRequest::new(session_id.clone(), vec!["ghi".into()]),
2816 cx,
2817 )
2818 })
2819 .await;
2820 assert_eq!(
2821 result.as_ref().unwrap_err().to_string(),
2822 "Session not found",
2823 "unexpected result: {:?}",
2824 result
2825 );
2826}
2827
2828#[gpui::test]
2829async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
2830 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2831 thread.update(cx, |thread, _cx| thread.add_tool(ThinkingTool));
2832 let fake_model = model.as_fake();
2833
2834 let mut events = thread
2835 .update(cx, |thread, cx| {
2836 thread.send(UserMessageId::new(), ["Think"], cx)
2837 })
2838 .unwrap();
2839 cx.run_until_parked();
2840
2841 // Simulate streaming partial input.
2842 let input = json!({});
2843 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2844 LanguageModelToolUse {
2845 id: "1".into(),
2846 name: ThinkingTool::name().into(),
2847 raw_input: input.to_string(),
2848 input,
2849 is_input_complete: false,
2850 thought_signature: None,
2851 },
2852 ));
2853
2854 // Input streaming completed
2855 let input = json!({ "content": "Thinking hard!" });
2856 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
2857 LanguageModelToolUse {
2858 id: "1".into(),
2859 name: "thinking".into(),
2860 raw_input: input.to_string(),
2861 input,
2862 is_input_complete: true,
2863 thought_signature: None,
2864 },
2865 ));
2866 fake_model.end_last_completion_stream();
2867 cx.run_until_parked();
2868
2869 let tool_call = expect_tool_call(&mut events).await;
2870 assert_eq!(
2871 tool_call,
2872 acp::ToolCall::new("1", "Thinking")
2873 .kind(acp::ToolKind::Think)
2874 .raw_input(json!({}))
2875 .meta(acp::Meta::from_iter([(
2876 "tool_name".into(),
2877 "thinking".into()
2878 )]))
2879 );
2880 let update = expect_tool_call_update_fields(&mut events).await;
2881 assert_eq!(
2882 update,
2883 acp::ToolCallUpdate::new(
2884 "1",
2885 acp::ToolCallUpdateFields::new()
2886 .title("Thinking")
2887 .kind(acp::ToolKind::Think)
2888 .raw_input(json!({ "content": "Thinking hard!"}))
2889 )
2890 );
2891 let update = expect_tool_call_update_fields(&mut events).await;
2892 assert_eq!(
2893 update,
2894 acp::ToolCallUpdate::new(
2895 "1",
2896 acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
2897 )
2898 );
2899 let update = expect_tool_call_update_fields(&mut events).await;
2900 assert_eq!(
2901 update,
2902 acp::ToolCallUpdate::new(
2903 "1",
2904 acp::ToolCallUpdateFields::new().content(vec!["Thinking hard!".into()])
2905 )
2906 );
2907 let update = expect_tool_call_update_fields(&mut events).await;
2908 assert_eq!(
2909 update,
2910 acp::ToolCallUpdate::new(
2911 "1",
2912 acp::ToolCallUpdateFields::new()
2913 .status(acp::ToolCallStatus::Completed)
2914 .raw_output("Finished thinking.")
2915 )
2916 );
2917}
2918
2919#[gpui::test]
2920async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
2921 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2922 let fake_model = model.as_fake();
2923
2924 let mut events = thread
2925 .update(cx, |thread, cx| {
2926 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2927 thread.send(UserMessageId::new(), ["Hello!"], cx)
2928 })
2929 .unwrap();
2930 cx.run_until_parked();
2931
2932 fake_model.send_last_completion_stream_text_chunk("Hey!");
2933 fake_model.end_last_completion_stream();
2934
2935 let mut retry_events = Vec::new();
2936 while let Some(Ok(event)) = events.next().await {
2937 match event {
2938 ThreadEvent::Retry(retry_status) => {
2939 retry_events.push(retry_status);
2940 }
2941 ThreadEvent::Stop(..) => break,
2942 _ => {}
2943 }
2944 }
2945
2946 assert_eq!(retry_events.len(), 0);
2947 thread.read_with(cx, |thread, _cx| {
2948 assert_eq!(
2949 thread.to_markdown(),
2950 indoc! {"
2951 ## User
2952
2953 Hello!
2954
2955 ## Assistant
2956
2957 Hey!
2958 "}
2959 )
2960 });
2961}
2962
2963#[gpui::test]
2964async fn test_send_retry_on_error(cx: &mut TestAppContext) {
2965 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
2966 let fake_model = model.as_fake();
2967
2968 let mut events = thread
2969 .update(cx, |thread, cx| {
2970 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
2971 thread.send(UserMessageId::new(), ["Hello!"], cx)
2972 })
2973 .unwrap();
2974 cx.run_until_parked();
2975
2976 fake_model.send_last_completion_stream_text_chunk("Hey,");
2977 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
2978 provider: LanguageModelProviderName::new("Anthropic"),
2979 retry_after: Some(Duration::from_secs(3)),
2980 });
2981 fake_model.end_last_completion_stream();
2982
2983 cx.executor().advance_clock(Duration::from_secs(3));
2984 cx.run_until_parked();
2985
2986 fake_model.send_last_completion_stream_text_chunk("there!");
2987 fake_model.end_last_completion_stream();
2988 cx.run_until_parked();
2989
2990 let mut retry_events = Vec::new();
2991 while let Some(Ok(event)) = events.next().await {
2992 match event {
2993 ThreadEvent::Retry(retry_status) => {
2994 retry_events.push(retry_status);
2995 }
2996 ThreadEvent::Stop(..) => break,
2997 _ => {}
2998 }
2999 }
3000
3001 assert_eq!(retry_events.len(), 1);
3002 assert!(matches!(
3003 retry_events[0],
3004 acp_thread::RetryStatus { attempt: 1, .. }
3005 ));
3006 thread.read_with(cx, |thread, _cx| {
3007 assert_eq!(
3008 thread.to_markdown(),
3009 indoc! {"
3010 ## User
3011
3012 Hello!
3013
3014 ## Assistant
3015
3016 Hey,
3017
3018 [resume]
3019
3020 ## Assistant
3021
3022 there!
3023 "}
3024 )
3025 });
3026}
3027
3028#[gpui::test]
3029async fn test_send_retry_finishes_tool_calls_on_error(cx: &mut TestAppContext) {
3030 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3031 let fake_model = model.as_fake();
3032
3033 let events = thread
3034 .update(cx, |thread, cx| {
3035 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3036 thread.add_tool(EchoTool);
3037 thread.send(UserMessageId::new(), ["Call the echo tool!"], cx)
3038 })
3039 .unwrap();
3040 cx.run_until_parked();
3041
3042 let tool_use_1 = LanguageModelToolUse {
3043 id: "tool_1".into(),
3044 name: EchoTool::name().into(),
3045 raw_input: json!({"text": "test"}).to_string(),
3046 input: json!({"text": "test"}),
3047 is_input_complete: true,
3048 thought_signature: None,
3049 };
3050 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
3051 tool_use_1.clone(),
3052 ));
3053 fake_model.send_last_completion_stream_error(LanguageModelCompletionError::ServerOverloaded {
3054 provider: LanguageModelProviderName::new("Anthropic"),
3055 retry_after: Some(Duration::from_secs(3)),
3056 });
3057 fake_model.end_last_completion_stream();
3058
3059 cx.executor().advance_clock(Duration::from_secs(3));
3060 let completion = fake_model.pending_completions().pop().unwrap();
3061 assert_eq!(
3062 completion.messages[1..],
3063 vec![
3064 LanguageModelRequestMessage {
3065 role: Role::User,
3066 content: vec!["Call the echo tool!".into()],
3067 cache: false,
3068 reasoning_details: None,
3069 },
3070 LanguageModelRequestMessage {
3071 role: Role::Assistant,
3072 content: vec![language_model::MessageContent::ToolUse(tool_use_1.clone())],
3073 cache: false,
3074 reasoning_details: None,
3075 },
3076 LanguageModelRequestMessage {
3077 role: Role::User,
3078 content: vec![language_model::MessageContent::ToolResult(
3079 LanguageModelToolResult {
3080 tool_use_id: tool_use_1.id.clone(),
3081 tool_name: tool_use_1.name.clone(),
3082 is_error: false,
3083 content: "test".into(),
3084 output: Some("test".into())
3085 }
3086 )],
3087 cache: true,
3088 reasoning_details: None,
3089 },
3090 ]
3091 );
3092
3093 fake_model.send_last_completion_stream_text_chunk("Done");
3094 fake_model.end_last_completion_stream();
3095 cx.run_until_parked();
3096 events.collect::<Vec<_>>().await;
3097 thread.read_with(cx, |thread, _cx| {
3098 assert_eq!(
3099 thread.last_message(),
3100 Some(Message::Agent(AgentMessage {
3101 content: vec![AgentMessageContent::Text("Done".into())],
3102 tool_results: IndexMap::default(),
3103 reasoning_details: None,
3104 }))
3105 );
3106 })
3107}
3108
3109#[gpui::test]
3110async fn test_send_max_retries_exceeded(cx: &mut TestAppContext) {
3111 let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
3112 let fake_model = model.as_fake();
3113
3114 let mut events = thread
3115 .update(cx, |thread, cx| {
3116 thread.set_completion_mode(agent_settings::CompletionMode::Burn, cx);
3117 thread.send(UserMessageId::new(), ["Hello!"], cx)
3118 })
3119 .unwrap();
3120 cx.run_until_parked();
3121
3122 for _ in 0..crate::thread::MAX_RETRY_ATTEMPTS + 1 {
3123 fake_model.send_last_completion_stream_error(
3124 LanguageModelCompletionError::ServerOverloaded {
3125 provider: LanguageModelProviderName::new("Anthropic"),
3126 retry_after: Some(Duration::from_secs(3)),
3127 },
3128 );
3129 fake_model.end_last_completion_stream();
3130 cx.executor().advance_clock(Duration::from_secs(3));
3131 cx.run_until_parked();
3132 }
3133
3134 let mut errors = Vec::new();
3135 let mut retry_events = Vec::new();
3136 while let Some(event) = events.next().await {
3137 match event {
3138 Ok(ThreadEvent::Retry(retry_status)) => {
3139 retry_events.push(retry_status);
3140 }
3141 Ok(ThreadEvent::Stop(..)) => break,
3142 Err(error) => errors.push(error),
3143 _ => {}
3144 }
3145 }
3146
3147 assert_eq!(
3148 retry_events.len(),
3149 crate::thread::MAX_RETRY_ATTEMPTS as usize
3150 );
3151 for i in 0..crate::thread::MAX_RETRY_ATTEMPTS as usize {
3152 assert_eq!(retry_events[i].attempt, i + 1);
3153 }
3154 assert_eq!(errors.len(), 1);
3155 let error = errors[0]
3156 .downcast_ref::<LanguageModelCompletionError>()
3157 .unwrap();
3158 assert!(matches!(
3159 error,
3160 LanguageModelCompletionError::ServerOverloaded { .. }
3161 ));
3162}
3163
3164/// Filters out the stop events for asserting against in tests
3165fn stop_events(result_events: Vec<Result<ThreadEvent>>) -> Vec<acp::StopReason> {
3166 result_events
3167 .into_iter()
3168 .filter_map(|event| match event.unwrap() {
3169 ThreadEvent::Stop(stop_reason) => Some(stop_reason),
3170 _ => None,
3171 })
3172 .collect()
3173}
3174
3175struct ThreadTest {
3176 model: Arc<dyn LanguageModel>,
3177 thread: Entity<Thread>,
3178 project_context: Entity<ProjectContext>,
3179 context_server_store: Entity<ContextServerStore>,
3180 fs: Arc<FakeFs>,
3181}
3182
3183enum TestModel {
3184 Sonnet4,
3185 Fake,
3186}
3187
3188impl TestModel {
3189 fn id(&self) -> LanguageModelId {
3190 match self {
3191 TestModel::Sonnet4 => LanguageModelId("claude-sonnet-4-latest".into()),
3192 TestModel::Fake => unreachable!(),
3193 }
3194 }
3195}
3196
3197async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
3198 cx.executor().allow_parking();
3199
3200 let fs = FakeFs::new(cx.background_executor.clone());
3201 fs.create_dir(paths::settings_file().parent().unwrap())
3202 .await
3203 .unwrap();
3204 fs.insert_file(
3205 paths::settings_file(),
3206 json!({
3207 "agent": {
3208 "default_profile": "test-profile",
3209 "profiles": {
3210 "test-profile": {
3211 "name": "Test Profile",
3212 "tools": {
3213 EchoTool::name(): true,
3214 DelayTool::name(): true,
3215 WordListTool::name(): true,
3216 ToolRequiringPermission::name(): true,
3217 InfiniteTool::name(): true,
3218 ThinkingTool::name(): true,
3219 "terminal": true,
3220 }
3221 }
3222 }
3223 }
3224 })
3225 .to_string()
3226 .into_bytes(),
3227 )
3228 .await;
3229
3230 cx.update(|cx| {
3231 settings::init(cx);
3232
3233 match model {
3234 TestModel::Fake => {}
3235 TestModel::Sonnet4 => {
3236 gpui_tokio::init(cx);
3237 let http_client = ReqwestClient::user_agent("agent tests").unwrap();
3238 cx.set_http_client(Arc::new(http_client));
3239 let client = Client::production(cx);
3240 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
3241 language_model::init(client.clone(), cx);
3242 language_models::init(user_store, client.clone(), cx);
3243 }
3244 };
3245
3246 watch_settings(fs.clone(), cx);
3247 });
3248
3249 let templates = Templates::new();
3250
3251 fs.insert_tree(path!("/test"), json!({})).await;
3252 let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
3253
3254 let model = cx
3255 .update(|cx| {
3256 if let TestModel::Fake = model {
3257 Task::ready(Arc::new(FakeLanguageModel::default()) as Arc<_>)
3258 } else {
3259 let model_id = model.id();
3260 let models = LanguageModelRegistry::read_global(cx);
3261 let model = models
3262 .available_models(cx)
3263 .find(|model| model.id() == model_id)
3264 .unwrap();
3265
3266 let provider = models.provider(&model.provider_id()).unwrap();
3267 let authenticated = provider.authenticate(cx);
3268
3269 cx.spawn(async move |_cx| {
3270 authenticated.await.unwrap();
3271 model
3272 })
3273 }
3274 })
3275 .await;
3276
3277 let project_context = cx.new(|_cx| ProjectContext::default());
3278 let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
3279 let context_server_registry =
3280 cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
3281 let thread = cx.new(|cx| {
3282 Thread::new(
3283 project,
3284 project_context.clone(),
3285 context_server_registry,
3286 templates,
3287 Some(model.clone()),
3288 cx,
3289 )
3290 });
3291 ThreadTest {
3292 model,
3293 thread,
3294 project_context,
3295 context_server_store,
3296 fs,
3297 }
3298}
3299
3300#[cfg(test)]
3301#[ctor::ctor]
3302fn init_logger() {
3303 if std::env::var("RUST_LOG").is_ok() {
3304 env_logger::init();
3305 }
3306}
3307
3308fn watch_settings(fs: Arc<dyn Fs>, cx: &mut App) {
3309 let fs = fs.clone();
3310 cx.spawn({
3311 async move |cx| {
3312 let mut new_settings_content_rx = settings::watch_config_file(
3313 cx.background_executor(),
3314 fs,
3315 paths::settings_file().clone(),
3316 );
3317
3318 while let Some(new_settings_content) = new_settings_content_rx.next().await {
3319 cx.update(|cx| {
3320 SettingsStore::update_global(cx, |settings, cx| {
3321 settings.set_user_settings(&new_settings_content, cx)
3322 })
3323 })
3324 .ok();
3325 }
3326 }
3327 })
3328 .detach();
3329}
3330
3331fn tool_names_for_completion(completion: &LanguageModelRequest) -> Vec<String> {
3332 completion
3333 .tools
3334 .iter()
3335 .map(|tool| tool.name.clone())
3336 .collect()
3337}
3338
3339fn setup_context_server(
3340 name: &'static str,
3341 tools: Vec<context_server::types::Tool>,
3342 context_server_store: &Entity<ContextServerStore>,
3343 cx: &mut TestAppContext,
3344) -> mpsc::UnboundedReceiver<(
3345 context_server::types::CallToolParams,
3346 oneshot::Sender<context_server::types::CallToolResponse>,
3347)> {
3348 cx.update(|cx| {
3349 let mut settings = ProjectSettings::get_global(cx).clone();
3350 settings.context_servers.insert(
3351 name.into(),
3352 project::project_settings::ContextServerSettings::Stdio {
3353 enabled: true,
3354 command: ContextServerCommand {
3355 path: "somebinary".into(),
3356 args: Vec::new(),
3357 env: None,
3358 timeout: None,
3359 },
3360 },
3361 );
3362 ProjectSettings::override_global(settings, cx);
3363 });
3364
3365 let (mcp_tool_calls_tx, mcp_tool_calls_rx) = mpsc::unbounded();
3366 let fake_transport = context_server::test::create_fake_transport(name, cx.executor())
3367 .on_request::<context_server::types::requests::Initialize, _>(move |_params| async move {
3368 context_server::types::InitializeResponse {
3369 protocol_version: context_server::types::ProtocolVersion(
3370 context_server::types::LATEST_PROTOCOL_VERSION.to_string(),
3371 ),
3372 server_info: context_server::types::Implementation {
3373 name: name.into(),
3374 version: "1.0.0".to_string(),
3375 },
3376 capabilities: context_server::types::ServerCapabilities {
3377 tools: Some(context_server::types::ToolsCapabilities {
3378 list_changed: Some(true),
3379 }),
3380 ..Default::default()
3381 },
3382 meta: None,
3383 }
3384 })
3385 .on_request::<context_server::types::requests::ListTools, _>(move |_params| {
3386 let tools = tools.clone();
3387 async move {
3388 context_server::types::ListToolsResponse {
3389 tools,
3390 next_cursor: None,
3391 meta: None,
3392 }
3393 }
3394 })
3395 .on_request::<context_server::types::requests::CallTool, _>(move |params| {
3396 let mcp_tool_calls_tx = mcp_tool_calls_tx.clone();
3397 async move {
3398 let (response_tx, response_rx) = oneshot::channel();
3399 mcp_tool_calls_tx
3400 .unbounded_send((params, response_tx))
3401 .unwrap();
3402 response_rx.await.unwrap()
3403 }
3404 });
3405 context_server_store.update(cx, |store, cx| {
3406 store.start_server(
3407 Arc::new(ContextServer::new(
3408 ContextServerId(name.into()),
3409 Arc::new(fake_transport),
3410 )),
3411 cx,
3412 );
3413 });
3414 cx.run_until_parked();
3415 mcp_tool_calls_rx
3416}
3417
3418#[gpui::test]
3419async fn test_tokens_before_message(cx: &mut TestAppContext) {
3420 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3421 let fake_model = model.as_fake();
3422
3423 // First message
3424 let message_1_id = UserMessageId::new();
3425 thread
3426 .update(cx, |thread, cx| {
3427 thread.send(message_1_id.clone(), ["First message"], cx)
3428 })
3429 .unwrap();
3430 cx.run_until_parked();
3431
3432 // Before any response, tokens_before_message should return None for first message
3433 thread.read_with(cx, |thread, _| {
3434 assert_eq!(
3435 thread.tokens_before_message(&message_1_id),
3436 None,
3437 "First message should have no tokens before it"
3438 );
3439 });
3440
3441 // Complete first message with usage
3442 fake_model.send_last_completion_stream_text_chunk("Response 1");
3443 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3444 language_model::TokenUsage {
3445 input_tokens: 100,
3446 output_tokens: 50,
3447 cache_creation_input_tokens: 0,
3448 cache_read_input_tokens: 0,
3449 },
3450 ));
3451 fake_model.end_last_completion_stream();
3452 cx.run_until_parked();
3453
3454 // First message still has no tokens before it
3455 thread.read_with(cx, |thread, _| {
3456 assert_eq!(
3457 thread.tokens_before_message(&message_1_id),
3458 None,
3459 "First message should still have no tokens before it after response"
3460 );
3461 });
3462
3463 // Second message
3464 let message_2_id = UserMessageId::new();
3465 thread
3466 .update(cx, |thread, cx| {
3467 thread.send(message_2_id.clone(), ["Second message"], cx)
3468 })
3469 .unwrap();
3470 cx.run_until_parked();
3471
3472 // Second message should have first message's input tokens before it
3473 thread.read_with(cx, |thread, _| {
3474 assert_eq!(
3475 thread.tokens_before_message(&message_2_id),
3476 Some(100),
3477 "Second message should have 100 tokens before it (from first request)"
3478 );
3479 });
3480
3481 // Complete second message
3482 fake_model.send_last_completion_stream_text_chunk("Response 2");
3483 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3484 language_model::TokenUsage {
3485 input_tokens: 250, // Total for this request (includes previous context)
3486 output_tokens: 75,
3487 cache_creation_input_tokens: 0,
3488 cache_read_input_tokens: 0,
3489 },
3490 ));
3491 fake_model.end_last_completion_stream();
3492 cx.run_until_parked();
3493
3494 // Third message
3495 let message_3_id = UserMessageId::new();
3496 thread
3497 .update(cx, |thread, cx| {
3498 thread.send(message_3_id.clone(), ["Third message"], cx)
3499 })
3500 .unwrap();
3501 cx.run_until_parked();
3502
3503 // Third message should have second message's input tokens (250) before it
3504 thread.read_with(cx, |thread, _| {
3505 assert_eq!(
3506 thread.tokens_before_message(&message_3_id),
3507 Some(250),
3508 "Third message should have 250 tokens before it (from second request)"
3509 );
3510 // Second message should still have 100
3511 assert_eq!(
3512 thread.tokens_before_message(&message_2_id),
3513 Some(100),
3514 "Second message should still have 100 tokens before it"
3515 );
3516 // First message still has none
3517 assert_eq!(
3518 thread.tokens_before_message(&message_1_id),
3519 None,
3520 "First message should still have no tokens before it"
3521 );
3522 });
3523}
3524
3525#[gpui::test]
3526async fn test_tokens_before_message_after_truncate(cx: &mut TestAppContext) {
3527 let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
3528 let fake_model = model.as_fake();
3529
3530 // Set up three messages with responses
3531 let message_1_id = UserMessageId::new();
3532 thread
3533 .update(cx, |thread, cx| {
3534 thread.send(message_1_id.clone(), ["Message 1"], cx)
3535 })
3536 .unwrap();
3537 cx.run_until_parked();
3538 fake_model.send_last_completion_stream_text_chunk("Response 1");
3539 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3540 language_model::TokenUsage {
3541 input_tokens: 100,
3542 output_tokens: 50,
3543 cache_creation_input_tokens: 0,
3544 cache_read_input_tokens: 0,
3545 },
3546 ));
3547 fake_model.end_last_completion_stream();
3548 cx.run_until_parked();
3549
3550 let message_2_id = UserMessageId::new();
3551 thread
3552 .update(cx, |thread, cx| {
3553 thread.send(message_2_id.clone(), ["Message 2"], cx)
3554 })
3555 .unwrap();
3556 cx.run_until_parked();
3557 fake_model.send_last_completion_stream_text_chunk("Response 2");
3558 fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
3559 language_model::TokenUsage {
3560 input_tokens: 250,
3561 output_tokens: 75,
3562 cache_creation_input_tokens: 0,
3563 cache_read_input_tokens: 0,
3564 },
3565 ));
3566 fake_model.end_last_completion_stream();
3567 cx.run_until_parked();
3568
3569 // Verify initial state
3570 thread.read_with(cx, |thread, _| {
3571 assert_eq!(thread.tokens_before_message(&message_2_id), Some(100));
3572 });
3573
3574 // Truncate at message 2 (removes message 2 and everything after)
3575 thread
3576 .update(cx, |thread, cx| thread.truncate(message_2_id.clone(), cx))
3577 .unwrap();
3578 cx.run_until_parked();
3579
3580 // After truncation, message_2_id no longer exists, so lookup should return None
3581 thread.read_with(cx, |thread, _| {
3582 assert_eq!(
3583 thread.tokens_before_message(&message_2_id),
3584 None,
3585 "After truncation, message 2 no longer exists"
3586 );
3587 // Message 1 still exists but has no tokens before it
3588 assert_eq!(
3589 thread.tokens_before_message(&message_1_id),
3590 None,
3591 "First message still has no tokens before it"
3592 );
3593 });
3594}