1use crate::tools::streaming_edit_file_tool::*;
2use crate::{
3 AgentTool, ContextServerRegistry, EditFileTool, GrepTool, GrepToolInput, ListDirectoryTool,
4 ListDirectoryToolInput, ReadFileTool, ReadFileToolInput, StreamingEditFileTool, Template,
5 Templates, Thread, ToolCallEventStream, ToolInput,
6};
7use Role::*;
8use anyhow::{Context as _, Result};
9use client::{Client, UserStore};
10use fs::FakeFs;
11use futures::{FutureExt, StreamExt, future::LocalBoxFuture};
12use gpui::{AppContext as _, AsyncApp, Entity, TestAppContext, UpdateGlobal as _};
13use http_client::StatusCode;
14use language::language_settings::FormatOnSave;
15use language_model::{
16 LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
17 LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
18 LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent,
19 LanguageModelToolSchemaFormat, LanguageModelToolUse, LanguageModelToolUseId, MessageContent,
20 Role, SelectedModel,
21};
22use project::Project;
23use prompt_store::{ProjectContext, WorktreeContext};
24use rand::prelude::*;
25use reqwest_client::ReqwestClient;
26use serde::Serialize;
27use serde_json::json;
28use settings::SettingsStore;
29use std::{
30 fmt::{self, Display},
31 path::{Path, PathBuf},
32 str::FromStr,
33 sync::Arc,
34 time::Duration,
35};
36use util::path;
37
38#[derive(Serialize)]
39struct DiffJudgeTemplate {
40 diff: String,
41 assertions: &'static str,
42}
43
44impl Template for DiffJudgeTemplate {
45 const TEMPLATE_NAME: &'static str = "diff_judge.hbs";
46}
47
48#[derive(Clone)]
49struct EvalInput {
50 conversation: Vec<LanguageModelRequestMessage>,
51 input_file_path: PathBuf,
52 input_content: Option<String>,
53 assertion: EvalAssertion,
54}
55
56impl EvalInput {
57 fn new(
58 conversation: Vec<LanguageModelRequestMessage>,
59 input_file_path: impl Into<PathBuf>,
60 input_content: Option<String>,
61 assertion: EvalAssertion,
62 ) -> Self {
63 EvalInput {
64 conversation,
65 input_file_path: input_file_path.into(),
66 input_content,
67 assertion,
68 }
69 }
70}
71
72#[derive(Clone)]
73struct EvalSample {
74 text_before: String,
75 text_after: String,
76 tool_input: StreamingEditFileToolInput,
77 diff: String,
78}
79
80trait AssertionFn: 'static + Send + Sync {
81 fn assert<'a>(
82 &'a self,
83 sample: &'a EvalSample,
84 judge_model: Arc<dyn LanguageModel>,
85 cx: &'a mut TestAppContext,
86 ) -> LocalBoxFuture<'a, Result<EvalAssertionOutcome>>;
87}
88
89impl<F> AssertionFn for F
90where
91 F: 'static
92 + Send
93 + Sync
94 + AsyncFn(
95 &EvalSample,
96 Arc<dyn LanguageModel>,
97 &mut TestAppContext,
98 ) -> Result<EvalAssertionOutcome>,
99{
100 fn assert<'a>(
101 &'a self,
102 sample: &'a EvalSample,
103 judge_model: Arc<dyn LanguageModel>,
104 cx: &'a mut TestAppContext,
105 ) -> LocalBoxFuture<'a, Result<EvalAssertionOutcome>> {
106 (self)(sample, judge_model, cx).boxed_local()
107 }
108}
109
110#[derive(Clone)]
111struct EvalAssertion(Arc<dyn AssertionFn>);
112
113impl EvalAssertion {
114 fn new<F>(f: F) -> Self
115 where
116 F: 'static
117 + Send
118 + Sync
119 + AsyncFn(
120 &EvalSample,
121 Arc<dyn LanguageModel>,
122 &mut TestAppContext,
123 ) -> Result<EvalAssertionOutcome>,
124 {
125 EvalAssertion(Arc::new(f))
126 }
127
128 fn assert_eq(expected: impl Into<String>) -> Self {
129 let expected = expected.into();
130 Self::new(async move |sample, _judge, _cx| {
131 Ok(EvalAssertionOutcome {
132 score: if strip_empty_lines(&sample.text_after) == strip_empty_lines(&expected) {
133 100
134 } else {
135 0
136 },
137 message: None,
138 })
139 })
140 }
141
142 fn assert_diff_any(expected_diffs: Vec<impl Into<String>>) -> Self {
143 let expected_diffs: Vec<String> = expected_diffs.into_iter().map(Into::into).collect();
144 Self::new(async move |sample, _judge, _cx| {
145 let matches = expected_diffs.iter().any(|possible_diff| {
146 language::apply_diff_patch(&sample.text_before, possible_diff)
147 .map(|expected| {
148 strip_empty_lines(&expected) == strip_empty_lines(&sample.text_after)
149 })
150 .unwrap_or(false)
151 });
152
153 Ok(EvalAssertionOutcome {
154 score: if matches { 100 } else { 0 },
155 message: None,
156 })
157 })
158 }
159
160 fn judge_diff(assertions: &'static str) -> Self {
161 Self::new(async move |sample, judge, cx| {
162 let prompt = DiffJudgeTemplate {
163 diff: sample.diff.clone(),
164 assertions,
165 }
166 .render(&Templates::new())
167 .context("Failed to render diff judge template")?;
168
169 let request = LanguageModelRequest {
170 messages: vec![LanguageModelRequestMessage {
171 role: Role::User,
172 content: vec![prompt.into()],
173 cache: false,
174 reasoning_details: None,
175 }],
176 thinking_allowed: true,
177 thinking_effort: judge
178 .default_effort_level()
179 .map(|effort_level| effort_level.value.to_string()),
180 ..Default::default()
181 };
182 let mut response = retry_on_rate_limit(async || {
183 Ok(judge
184 .stream_completion_text(request.clone(), &cx.to_async())
185 .await?)
186 })
187 .await?;
188 let mut output = String::new();
189 while let Some(chunk) = response.stream.next().await {
190 let chunk = chunk?;
191 output.push_str(&chunk);
192 }
193
194 let re = regex::Regex::new(r"<score>(\d+)</score>")
195 .context("Failed to compile score regex")?;
196 if let Some(captures) = re.captures(&output)
197 && let Some(score_match) = captures.get(1)
198 {
199 let score = score_match.as_str().parse().unwrap_or(0);
200 return Ok(EvalAssertionOutcome {
201 score,
202 message: Some(output),
203 });
204 }
205
206 anyhow::bail!("No score found in response. Raw output: {output}");
207 })
208 }
209
210 async fn run(
211 &self,
212 input: &EvalSample,
213 judge_model: Arc<dyn LanguageModel>,
214 cx: &mut TestAppContext,
215 ) -> Result<EvalAssertionOutcome> {
216 self.0.assert(input, judge_model, cx).await
217 }
218}
219
220#[derive(Clone)]
221struct StreamingEditEvalOutput {
222 sample: EvalSample,
223 assertion: EvalAssertionOutcome,
224}
225
226impl Display for StreamingEditEvalOutput {
227 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
228 writeln!(f, "Score: {:?}", self.assertion.score)?;
229 if let Some(message) = self.assertion.message.as_ref() {
230 writeln!(f, "Message: {}", message)?;
231 }
232 writeln!(f, "Diff:\n{}", self.sample.diff)?;
233 writeln!(f, "Tool Input:\n{:#?}", self.sample.tool_input)?;
234 Ok(())
235 }
236}
237
238#[derive(Clone, Debug, Eq, PartialEq, Hash)]
239struct EvalAssertionOutcome {
240 score: usize,
241 message: Option<String>,
242}
243
244struct StreamingEditToolTest {
245 fs: Arc<FakeFs>,
246 project: Entity<Project>,
247 model: Arc<dyn LanguageModel>,
248 judge_model: Arc<dyn LanguageModel>,
249 model_thinking_effort: Option<String>,
250}
251
252impl StreamingEditToolTest {
253 async fn new(cx: &mut TestAppContext) -> Self {
254 cx.executor().allow_parking();
255
256 let fs = FakeFs::new(cx.executor());
257 cx.update(|cx| {
258 let settings_store = SettingsStore::test(cx);
259 cx.set_global(settings_store);
260 SettingsStore::update_global(cx, |store: &mut SettingsStore, cx| {
261 store.update_user_settings(cx, |settings| {
262 settings
263 .project
264 .all_languages
265 .defaults
266 .ensure_final_newline_on_save = Some(false);
267 settings.project.all_languages.defaults.format_on_save =
268 Some(FormatOnSave::Off);
269 });
270 });
271
272 gpui_tokio::init(cx);
273 let http_client = Arc::new(ReqwestClient::user_agent("agent tests").unwrap());
274 cx.set_http_client(http_client);
275 let client = Client::production(cx);
276 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
277 language_model::init(user_store.clone(), client.clone(), cx);
278 language_models::init(user_store, client, cx);
279 });
280
281 fs.insert_tree("/root", json!({})).await;
282 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
283 let agent_model = SelectedModel::from_str(
284 &std::env::var("ZED_AGENT_MODEL")
285 .unwrap_or("anthropic/claude-sonnet-4-6-latest".into()),
286 )
287 .unwrap();
288 let judge_model = SelectedModel::from_str(
289 &std::env::var("ZED_JUDGE_MODEL")
290 .unwrap_or("anthropic/claude-sonnet-4-6-latest".into()),
291 )
292 .unwrap();
293
294 let authenticate_provider_tasks = cx.update(|cx| {
295 LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
296 registry
297 .providers()
298 .iter()
299 .map(|p| p.authenticate(cx))
300 .collect::<Vec<_>>()
301 })
302 });
303 let (model, judge_model) = cx
304 .update(|cx| {
305 cx.spawn(async move |cx| {
306 futures::future::join_all(authenticate_provider_tasks).await;
307 let model = Self::load_model(&agent_model, cx).await;
308 let judge_model = Self::load_model(&judge_model, cx).await;
309 (model.unwrap(), judge_model.unwrap())
310 })
311 })
312 .await;
313
314 let model_thinking_effort = model
315 .default_effort_level()
316 .map(|effort_level| effort_level.value.to_string());
317
318 Self {
319 fs,
320 project,
321 model,
322 judge_model,
323 model_thinking_effort,
324 }
325 }
326
327 async fn load_model(
328 selected_model: &SelectedModel,
329 cx: &mut AsyncApp,
330 ) -> Result<Arc<dyn LanguageModel>> {
331 cx.update(|cx| {
332 let registry = LanguageModelRegistry::read_global(cx);
333 let provider = registry
334 .provider(&selected_model.provider)
335 .expect("Provider not found");
336 provider.authenticate(cx)
337 })
338 .await?;
339 Ok(cx.update(|cx| {
340 let models = LanguageModelRegistry::read_global(cx);
341 models
342 .available_models(cx)
343 .find(|model| {
344 model.provider_id() == selected_model.provider
345 && model.id() == selected_model.model
346 })
347 .unwrap_or_else(|| panic!("Model {} not found", selected_model.model.0))
348 }))
349 }
350
351 /// Build the tool definitions for the model, replacing `edit_file` with the
352 /// streaming edit file tool schema. In production the streaming tool is
353 /// exposed under the name `"edit_file"` (see `Thread::enabled_tools`), so
354 /// the model has never seen the name `"streaming_edit_file"`.
355 fn build_tools() -> Vec<LanguageModelRequestTool> {
356 let mut tools: Vec<LanguageModelRequestTool> = crate::built_in_tools()
357 .filter(|tool| tool.name != EditFileTool::NAME)
358 .collect();
359 tools.push(LanguageModelRequestTool {
360 name: EditFileTool::NAME.to_string(),
361 description: StreamingEditFileTool::description().to_string(),
362 input_schema: StreamingEditFileTool::input_schema(
363 LanguageModelToolSchemaFormat::JsonSchema,
364 )
365 .to_value(),
366 use_input_streaming: StreamingEditFileTool::supports_input_streaming(),
367 });
368 tools
369 }
370
371 async fn eval(
372 &self,
373 mut eval: EvalInput,
374 cx: &mut TestAppContext,
375 ) -> Result<StreamingEditEvalOutput> {
376 eval.conversation
377 .last_mut()
378 .context("Conversation must not be empty")?
379 .cache = true;
380
381 // Populate the FakeFs so `resolve_path` / `entry_for_path` can find
382 // the file in the worktree.
383 if let Some(input_content) = eval.input_content.as_deref() {
384 let abs_path = Path::new("/root").join(
385 eval.input_file_path
386 .strip_prefix("root")
387 .unwrap_or(&eval.input_file_path),
388 );
389 self.fs.insert_file(&abs_path, input_content.into()).await;
390
391 // Wait for the worktree to pick up the new file.
392 cx.run_until_parked();
393 }
394
395 let tools = Self::build_tools();
396
397 let system_prompt = {
398 let worktrees = vec![WorktreeContext {
399 root_name: "root".to_string(),
400 abs_path: Path::new("/path/to/root").into(),
401 rules_file: None,
402 }];
403 let project_context = ProjectContext::new(worktrees, Vec::default());
404 let tool_names = tools
405 .iter()
406 .map(|tool| tool.name.clone().into())
407 .collect::<Vec<_>>();
408 let template = crate::SystemPromptTemplate {
409 project: &project_context,
410 available_tools: tool_names,
411 model_name: None,
412 };
413 let templates = Templates::new();
414 template.render(&templates)?
415 };
416
417 let has_system_prompt = eval
418 .conversation
419 .first()
420 .is_some_and(|msg| msg.role == Role::System);
421 let messages = if has_system_prompt {
422 eval.conversation
423 } else {
424 [LanguageModelRequestMessage {
425 role: Role::System,
426 content: vec![MessageContent::Text(system_prompt)],
427 cache: true,
428 reasoning_details: None,
429 }]
430 .into_iter()
431 .chain(eval.conversation)
432 .collect::<Vec<_>>()
433 };
434
435 let request = LanguageModelRequest {
436 messages,
437 tools,
438 thinking_allowed: true,
439 thinking_effort: self.model_thinking_effort.clone(),
440 ..Default::default()
441 };
442
443 // The model will call the tool as "edit_file" (the production-visible
444 // name), but the schema is from StreamingEditFileTool.
445 let tool_input =
446 retry_on_rate_limit(async || self.extract_tool_use(request.clone(), cx).await).await?;
447
448 let language_registry = self
449 .project
450 .read_with(cx, |project, _cx| project.languages().clone());
451
452 let context_server_registry = cx
453 .new(|cx| ContextServerRegistry::new(self.project.read(cx).context_server_store(), cx));
454 let thread = cx.new(|cx| {
455 Thread::new(
456 self.project.clone(),
457 cx.new(|_cx| ProjectContext::default()),
458 context_server_registry,
459 Templates::new(),
460 Some(self.model.clone()),
461 cx,
462 )
463 });
464 let action_log = thread.read_with(cx, |thread, _| thread.action_log().clone());
465
466 let tool = Arc::new(StreamingEditFileTool::new(
467 self.project.clone(),
468 thread.downgrade(),
469 action_log,
470 language_registry,
471 ));
472
473 let result = cx
474 .update(|cx| {
475 tool.clone().run(
476 ToolInput::resolved(tool_input.clone()),
477 ToolCallEventStream::test().0,
478 cx,
479 )
480 })
481 .await;
482
483 let output = match result {
484 Ok(output) => output,
485 Err(output) => {
486 anyhow::bail!("Tool returned error: {}", output);
487 }
488 };
489
490 let StreamingEditFileToolOutput::Success { new_text, .. } = &output else {
491 anyhow::bail!("Tool returned error output: {}", output);
492 };
493
494 let sample = EvalSample {
495 tool_input,
496 diff: language::unified_diff(
497 eval.input_content.as_deref().unwrap_or_default(),
498 new_text,
499 ),
500 text_before: eval.input_content.unwrap_or_default(),
501 text_after: new_text.clone(),
502 };
503
504 let assertion = eval
505 .assertion
506 .run(&sample, self.judge_model.clone(), cx)
507 .await?;
508
509 Ok(StreamingEditEvalOutput { assertion, sample })
510 }
511
512 /// Stream the model completion and extract the first complete tool use
513 /// whose name matches `EditFileTool::NAME` (the production-visible name
514 /// for the streaming edit tool), parsed as `StreamingEditFileToolInput`.
515 async fn extract_tool_use(
516 &self,
517 request: LanguageModelRequest,
518 cx: &mut TestAppContext,
519 ) -> Result<StreamingEditFileToolInput> {
520 let model = self.model.clone();
521 let events = cx
522 .update(|cx| {
523 let async_cx = cx.to_async();
524 cx.foreground_executor()
525 .spawn(async move { model.stream_completion(request, &async_cx).await })
526 })
527 .await
528 .map_err(|err| anyhow::anyhow!("completion error: {}", err))?;
529
530 let mut streamed_text = String::new();
531 let mut stop_reason = None;
532 let mut parse_errors = Vec::new();
533
534 let mut events = events.fuse();
535 while let Some(event) = events.next().await {
536 match event {
537 Ok(LanguageModelCompletionEvent::ToolUse(tool_use))
538 if tool_use.is_input_complete
539 && tool_use.name.as_ref() == EditFileTool::NAME =>
540 {
541 let input: StreamingEditFileToolInput = serde_json::from_value(tool_use.input)
542 .context("Failed to parse tool input as StreamingEditFileToolInput")?;
543 return Ok(input);
544 }
545 Ok(LanguageModelCompletionEvent::Text(text)) => {
546 if streamed_text.len() < 2_000 {
547 streamed_text.push_str(&text);
548 }
549 }
550 Ok(LanguageModelCompletionEvent::Stop(reason)) => {
551 stop_reason = Some(reason);
552 }
553 Ok(LanguageModelCompletionEvent::ToolUseJsonParseError {
554 tool_name,
555 raw_input,
556 json_parse_error,
557 ..
558 }) if tool_name.as_ref() == EditFileTool::NAME => {
559 parse_errors.push(format!("{json_parse_error}\nRaw input:\n{raw_input:?}"));
560 }
561 Err(err) => {
562 return Err(anyhow::anyhow!("completion error: {}", err));
563 }
564 _ => {}
565 }
566 }
567
568 let streamed_text = streamed_text.trim();
569 let streamed_text_suffix = if streamed_text.is_empty() {
570 String::new()
571 } else {
572 format!("\nStreamed text:\n{streamed_text}")
573 };
574 let stop_reason_suffix = stop_reason
575 .map(|reason| format!("\nStop reason: {reason:?}"))
576 .unwrap_or_default();
577 let parse_errors_suffix = if parse_errors.is_empty() {
578 String::new()
579 } else {
580 format!("\nTool parse errors:\n{}", parse_errors.join("\n"))
581 };
582
583 anyhow::bail!(
584 "Stream ended without an edit_file tool use{stop_reason_suffix}{parse_errors_suffix}{streamed_text_suffix}"
585 )
586 }
587}
588
589fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> {
590 let dispatcher = gpui::TestDispatcher::new(rand::random());
591 let mut cx = TestAppContext::build(dispatcher, None);
592 let foreground_executor = cx.foreground_executor().clone();
593 let result = foreground_executor.block_test(async {
594 let test = StreamingEditToolTest::new(&mut cx).await;
595 let result = test.eval(eval, &mut cx).await;
596 drop(test);
597 cx.run_until_parked();
598 result
599 });
600 cx.quit();
601 match result {
602 Ok(output) => eval_utils::EvalOutput {
603 data: output.to_string(),
604 outcome: if output.assertion.score < 80 {
605 eval_utils::OutcomeKind::Failed
606 } else {
607 eval_utils::OutcomeKind::Passed
608 },
609 metadata: (),
610 },
611 Err(err) => eval_utils::EvalOutput {
612 data: format!("{err:?}"),
613 outcome: eval_utils::OutcomeKind::Error,
614 metadata: (),
615 },
616 }
617}
618
619fn message(
620 role: Role,
621 contents: impl IntoIterator<Item = MessageContent>,
622) -> LanguageModelRequestMessage {
623 LanguageModelRequestMessage {
624 role,
625 content: contents.into_iter().collect(),
626 cache: false,
627 reasoning_details: None,
628 }
629}
630
631fn text(text: impl Into<String>) -> MessageContent {
632 MessageContent::Text(text.into())
633}
634
635fn lines(input: &str, range: std::ops::Range<usize>) -> String {
636 input
637 .lines()
638 .skip(range.start)
639 .take(range.len())
640 .collect::<Vec<_>>()
641 .join("\n")
642}
643
644fn tool_use(
645 id: impl Into<Arc<str>>,
646 name: impl Into<Arc<str>>,
647 input: impl Serialize,
648) -> MessageContent {
649 MessageContent::ToolUse(LanguageModelToolUse {
650 id: LanguageModelToolUseId::from(id.into()),
651 name: name.into(),
652 raw_input: serde_json::to_string_pretty(&input).unwrap(),
653 input: serde_json::to_value(input).unwrap(),
654 is_input_complete: true,
655 thought_signature: None,
656 })
657}
658
659fn tool_result(
660 id: impl Into<Arc<str>>,
661 name: impl Into<Arc<str>>,
662 result: impl Into<Arc<str>>,
663) -> MessageContent {
664 MessageContent::ToolResult(LanguageModelToolResult {
665 tool_use_id: LanguageModelToolUseId::from(id.into()),
666 tool_name: name.into(),
667 is_error: false,
668 content: LanguageModelToolResultContent::Text(result.into()),
669 output: None,
670 })
671}
672
673fn strip_empty_lines(text: &str) -> String {
674 text.lines()
675 .filter(|line| !line.trim().is_empty())
676 .collect::<Vec<_>>()
677 .join("\n")
678}
679
680async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) -> Result<R> {
681 const MAX_RETRIES: usize = 20;
682 let mut attempt = 0;
683
684 loop {
685 attempt += 1;
686 let response = request().await;
687
688 if attempt >= MAX_RETRIES {
689 return response;
690 }
691
692 let retry_delay = match &response {
693 Ok(_) => None,
694 Err(err) => match err.downcast_ref::<LanguageModelCompletionError>() {
695 Some(err) => match &err {
696 LanguageModelCompletionError::RateLimitExceeded { retry_after, .. }
697 | LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => {
698 Some(retry_after.unwrap_or(Duration::from_secs(5)))
699 }
700 LanguageModelCompletionError::UpstreamProviderError {
701 status,
702 retry_after,
703 ..
704 } => {
705 let should_retry = matches!(
706 *status,
707 StatusCode::TOO_MANY_REQUESTS | StatusCode::SERVICE_UNAVAILABLE
708 ) || status.as_u16() == 529;
709
710 if should_retry {
711 Some(retry_after.unwrap_or(Duration::from_secs(5)))
712 } else {
713 None
714 }
715 }
716 LanguageModelCompletionError::ApiReadResponseError { .. }
717 | LanguageModelCompletionError::ApiInternalServerError { .. }
718 | LanguageModelCompletionError::HttpSend { .. } => {
719 Some(Duration::from_secs(2_u64.pow((attempt - 1) as u32).min(30)))
720 }
721 _ => None,
722 },
723 _ => None,
724 },
725 };
726
727 if let Some(retry_after) = retry_delay {
728 let jitter = retry_after.mul_f64(rand::rng().random_range(0.0..1.0));
729 eprintln!("Attempt #{attempt}: Retry after {retry_after:?} + jitter of {jitter:?}");
730 #[allow(clippy::disallowed_methods)]
731 smol::Timer::after(retry_after + jitter).await;
732 } else {
733 return response;
734 }
735 }
736}
737
738#[test]
739#[cfg_attr(not(feature = "unit-eval"), ignore)]
740fn eval_delete_function() {
741 let input_file_path = "root/blame.rs";
742 let input_file_content = include_str!("fixtures/delete_run_git_blame/before.rs");
743 let output_file_content = include_str!("fixtures/delete_run_git_blame/after.rs");
744 let possible_diffs = vec![
745 language::unified_diff(input_file_content, output_file_content),
746 language::unified_diff(
747 input_file_content,
748 &output_file_content
749 .replace(
750 "const GIT_BLAME_NO_COMMIT_ERROR: &str = \"fatal: no such ref: HEAD\";\n",
751 "",
752 )
753 .replace(
754 "const GIT_BLAME_NO_PATH: &str = \"fatal: no such path\";\n",
755 "",
756 ),
757 ),
758 ];
759
760 eval_utils::eval(100, 0.95, eval_utils::NoProcessor, move || {
761 run_eval(EvalInput::new(
762 vec![
763 message(
764 User,
765 [text(indoc::formatdoc! {"
766 Read the `{input_file_path}` file and delete `run_git_blame`. Just that
767 one function, not its usages.
768 "})],
769 ),
770 message(
771 Assistant,
772 [tool_use(
773 "tool_1",
774 ReadFileTool::NAME,
775 ReadFileToolInput {
776 path: input_file_path.into(),
777 start_line: None,
778 end_line: None,
779 },
780 )],
781 ),
782 message(
783 User,
784 [tool_result(
785 "tool_1",
786 ReadFileTool::NAME,
787 input_file_content,
788 )],
789 ),
790 ],
791 input_file_path,
792 Some(input_file_content.into()),
793 EvalAssertion::assert_diff_any(possible_diffs.clone()),
794 ))
795 });
796}
797
798#[test]
799#[cfg_attr(not(feature = "unit-eval"), ignore)]
800fn eval_extract_handle_command_output() {
801 let input_file_path = "root/blame.rs";
802 let input_file_content = include_str!("fixtures/extract_handle_command_output/before.rs");
803 let possible_diffs = vec![
804 include_str!("fixtures/extract_handle_command_output/possible-01.diff"),
805 include_str!("fixtures/extract_handle_command_output/possible-02.diff"),
806 include_str!("fixtures/extract_handle_command_output/possible-03.diff"),
807 include_str!("fixtures/extract_handle_command_output/possible-04.diff"),
808 include_str!("fixtures/extract_handle_command_output/possible-05.diff"),
809 include_str!("fixtures/extract_handle_command_output/possible-06.diff"),
810 include_str!("fixtures/extract_handle_command_output/possible-07.diff"),
811 include_str!("fixtures/extract_handle_command_output/possible-08.diff"),
812 include_str!("fixtures/extract_handle_command_output/possible-09.diff"),
813 ];
814
815 eval_utils::eval(100, 0.95, eval_utils::NoProcessor, move || {
816 run_eval(EvalInput::new(
817 vec![
818 message(
819 User,
820 [text(indoc::formatdoc! {"
821 Read the `{input_file_path}` file and extract a method in
822 the final stanza of `run_git_blame` to deal with command failures,
823 call it `handle_command_output` and take the std::process::Output as the only parameter.
824 Do not document the method and do not add any comments.
825
826 Add it right next to `run_git_blame` and copy it verbatim from `run_git_blame`.
827 "})],
828 ),
829 message(
830 Assistant,
831 [tool_use(
832 "tool_1",
833 ReadFileTool::NAME,
834 ReadFileToolInput {
835 path: input_file_path.into(),
836 start_line: None,
837 end_line: None,
838 },
839 )],
840 ),
841 message(
842 User,
843 [tool_result(
844 "tool_1",
845 ReadFileTool::NAME,
846 input_file_content,
847 )],
848 ),
849 ],
850 input_file_path,
851 Some(input_file_content.into()),
852 EvalAssertion::assert_diff_any(possible_diffs.clone()),
853 ))
854 });
855}
856
857#[test]
858#[cfg_attr(not(feature = "unit-eval"), ignore)]
859fn eval_translate_doc_comments() {
860 let input_file_path = "root/canvas.rs";
861 let input_file_content = include_str!("fixtures/translate_doc_comments/before.rs");
862
863 eval_utils::eval(200, 1., eval_utils::NoProcessor, move || {
864 run_eval(EvalInput::new(
865 vec![
866 message(
867 User,
868 [text(indoc::formatdoc! {"
869 Read the `{input_file_path}` file and edit it (without overwriting it),
870 translating all the doc comments to italian.
871 "})],
872 ),
873 message(
874 Assistant,
875 [tool_use(
876 "tool_1",
877 ReadFileTool::NAME,
878 ReadFileToolInput {
879 path: input_file_path.into(),
880 start_line: None,
881 end_line: None,
882 },
883 )],
884 ),
885 message(
886 User,
887 [tool_result(
888 "tool_1",
889 ReadFileTool::NAME,
890 input_file_content,
891 )],
892 ),
893 ],
894 input_file_path,
895 Some(input_file_content.into()),
896 EvalAssertion::judge_diff("Doc comments were translated to Italian"),
897 ))
898 });
899}
900
901#[test]
902#[cfg_attr(not(feature = "unit-eval"), ignore)]
903fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
904 let input_file_path = "root/lib.rs";
905 let input_file_content =
906 include_str!("fixtures/use_wasi_sdk_in_compile_parser_to_wasm/before.rs");
907
908 eval_utils::eval(100, 0.95, eval_utils::NoProcessor, move || {
909 run_eval(EvalInput::new(
910 vec![
911 message(
912 User,
913 [text(indoc::formatdoc! {"
914 Read the `{input_file_path}` file and change `compile_parser_to_wasm` to use `wasi-sdk` instead of emscripten.
915 Use `ureq` to download the SDK for the current platform and architecture.
916 Extract the archive into a sibling of `lib` inside the `tree-sitter` directory in the cache_dir.
917 Compile the parser to wasm using the `bin/clang` executable (or `bin/clang.exe` on windows)
918 that's inside of the archive.
919 Don't re-download the SDK if that executable already exists.
920
921 Use these clang flags: -fPIC -shared -Os -Wl,--export=tree_sitter_{{language_name}}
922
923 Here are the available wasi-sdk assets:
924 - wasi-sdk-25.0-x86_64-macos.tar.gz
925 - wasi-sdk-25.0-arm64-macos.tar.gz
926 - wasi-sdk-25.0-x86_64-linux.tar.gz
927 - wasi-sdk-25.0-arm64-linux.tar.gz
928 - wasi-sdk-25.0-x86_64-linux.tar.gz
929 - wasi-sdk-25.0-arm64-linux.tar.gz
930 - wasi-sdk-25.0-x86_64-windows.tar.gz
931 "})],
932 ),
933 message(
934 Assistant,
935 [tool_use(
936 "tool_1",
937 ReadFileTool::NAME,
938 ReadFileToolInput {
939 path: input_file_path.into(),
940 start_line: Some(971),
941 end_line: Some(1050),
942 },
943 )],
944 ),
945 message(
946 User,
947 [tool_result(
948 "tool_1",
949 ReadFileTool::NAME,
950 lines(input_file_content, 971..1050),
951 )],
952 ),
953 message(
954 Assistant,
955 [tool_use(
956 "tool_2",
957 ReadFileTool::NAME,
958 ReadFileToolInput {
959 path: input_file_path.into(),
960 start_line: Some(1050),
961 end_line: Some(1100),
962 },
963 )],
964 ),
965 message(
966 User,
967 [tool_result(
968 "tool_2",
969 ReadFileTool::NAME,
970 lines(input_file_content, 1050..1100),
971 )],
972 ),
973 message(
974 Assistant,
975 [tool_use(
976 "tool_3",
977 ReadFileTool::NAME,
978 ReadFileToolInput {
979 path: input_file_path.into(),
980 start_line: Some(1100),
981 end_line: Some(1150),
982 },
983 )],
984 ),
985 message(
986 User,
987 [tool_result(
988 "tool_3",
989 ReadFileTool::NAME,
990 lines(input_file_content, 1100..1150),
991 )],
992 ),
993 ],
994 input_file_path,
995 Some(input_file_content.into()),
996 EvalAssertion::judge_diff(indoc::indoc! {"
997 - The compile_parser_to_wasm method has been changed to use wasi-sdk
998 - ureq is used to download the SDK for current platform and architecture
999 "}),
1000 ))
1001 });
1002}
1003
1004#[test]
1005#[cfg_attr(not(feature = "unit-eval"), ignore)]
1006fn eval_disable_cursor_blinking() {
1007 let input_file_path = "root/editor.rs";
1008 let input_file_content = include_str!("fixtures/disable_cursor_blinking/before.rs");
1009 let possible_diffs = vec![
1010 include_str!("fixtures/disable_cursor_blinking/possible-01.diff"),
1011 include_str!("fixtures/disable_cursor_blinking/possible-02.diff"),
1012 include_str!("fixtures/disable_cursor_blinking/possible-03.diff"),
1013 include_str!("fixtures/disable_cursor_blinking/possible-04.diff"),
1014 ];
1015
1016 eval_utils::eval(100, 0.51, eval_utils::NoProcessor, move || {
1017 run_eval(EvalInput::new(
1018 vec![
1019 message(User, [text("Let's research how to cursor blinking works.")]),
1020 message(
1021 Assistant,
1022 [tool_use(
1023 "tool_1",
1024 GrepTool::NAME,
1025 GrepToolInput {
1026 regex: "blink".into(),
1027 include_pattern: None,
1028 offset: 0,
1029 case_sensitive: false,
1030 },
1031 )],
1032 ),
1033 message(
1034 User,
1035 [tool_result(
1036 "tool_1",
1037 GrepTool::NAME,
1038 [
1039 lines(input_file_content, 100..400),
1040 lines(input_file_content, 800..1300),
1041 lines(input_file_content, 1600..2000),
1042 lines(input_file_content, 5000..5500),
1043 lines(input_file_content, 8000..9000),
1044 lines(input_file_content, 18455..18470),
1045 lines(input_file_content, 20000..20500),
1046 lines(input_file_content, 21000..21300),
1047 ]
1048 .join("Match found:\n\n"),
1049 )],
1050 ),
1051 message(
1052 User,
1053 [text(indoc::indoc! {"
1054 Comment out the lines that interact with the BlinkManager.
1055 Keep the outer `update` blocks, but comments everything that's inside (including if statements).
1056 Don't add additional comments.
1057 "})],
1058 ),
1059 ],
1060 input_file_path,
1061 Some(input_file_content.into()),
1062 EvalAssertion::assert_diff_any(possible_diffs.clone()),
1063 ))
1064 });
1065}
1066
1067#[test]
1068#[cfg_attr(not(feature = "unit-eval"), ignore)]
1069fn eval_from_pixels_constructor() {
1070 let input_file_path = "root/canvas.rs";
1071 let input_file_content = include_str!("fixtures/from_pixels_constructor/before.rs");
1072
1073 eval_utils::eval(100, 0.95, eval_utils::NoProcessor, move || {
1074 run_eval(EvalInput::new(
1075 vec![
1076 message(
1077 User,
1078 [text(indoc::indoc! {"
1079 Introduce a new `from_pixels` constructor in Canvas and
1080 also add tests for it in the same file.
1081 "})],
1082 ),
1083 message(
1084 Assistant,
1085 [tool_use(
1086 "tool_1",
1087 ReadFileTool::NAME,
1088 ReadFileToolInput {
1089 path: input_file_path.into(),
1090 start_line: None,
1091 end_line: None,
1092 },
1093 )],
1094 ),
1095 message(
1096 User,
1097 [tool_result(
1098 "tool_1",
1099 ReadFileTool::NAME,
1100 input_file_content,
1101 )],
1102 ),
1103 message(
1104 Assistant,
1105 [tool_use(
1106 "tool_2",
1107 GrepTool::NAME,
1108 GrepToolInput {
1109 regex: "mod\\s+tests".into(),
1110 include_pattern: Some("font-kit/src/canvas.rs".into()),
1111 offset: 0,
1112 case_sensitive: false,
1113 },
1114 )],
1115 ),
1116 message(
1117 User,
1118 [tool_result("tool_2", GrepTool::NAME, "No matches found")],
1119 ),
1120 message(
1121 Assistant,
1122 [tool_use(
1123 "tool_3",
1124 GrepTool::NAME,
1125 GrepToolInput {
1126 regex: "mod\\s+tests".into(),
1127 include_pattern: Some("font-kit/src/**/*.rs".into()),
1128 offset: 0,
1129 case_sensitive: false,
1130 },
1131 )],
1132 ),
1133 message(
1134 User,
1135 [tool_result("tool_3", GrepTool::NAME, "No matches found")],
1136 ),
1137 message(
1138 Assistant,
1139 [tool_use(
1140 "tool_4",
1141 GrepTool::NAME,
1142 GrepToolInput {
1143 regex: "#\\[test\\]".into(),
1144 include_pattern: Some("font-kit/src/**/*.rs".into()),
1145 offset: 0,
1146 case_sensitive: false,
1147 },
1148 )],
1149 ),
1150 message(
1151 User,
1152 [tool_result(
1153 "tool_4",
1154 GrepTool::NAME,
1155 indoc::indoc! {"
1156 Found 6 matches:
1157
1158 ## Matches in font-kit/src/loaders/core_text.rs
1159
1160 ### mod test › L926-936
1161 ```
1162 mod test {
1163 use super::Font;
1164 use crate::properties::{Stretch, Weight};
1165
1166 #[cfg(feature = \"source\")]
1167 use crate::source::SystemSource;
1168
1169 static TEST_FONT_POSTSCRIPT_NAME: &'static str = \"ArialMT\";
1170
1171 #[cfg(feature = \"source\")]
1172 #[test]
1173 ```
1174
1175 55 lines remaining in ancestor node. Read the file to see all.
1176
1177 ### mod test › L947-951
1178 ```
1179 }
1180
1181 #[test]
1182 fn test_core_text_to_css_font_weight() {
1183 // Exact matches
1184 ```
1185
1186 ### mod test › L959-963
1187 ```
1188 }
1189
1190 #[test]
1191 fn test_core_text_to_css_font_stretch() {
1192 // Exact matches
1193 ```
1194
1195 ## Matches in font-kit/src/loaders/freetype.rs
1196
1197 ### mod test › L1238-1248
1198 ```
1199 mod test {
1200 use crate::loaders::freetype::Font;
1201
1202 static PCF_FONT_PATH: &str = \"resources/tests/times-roman-pcf/timR12.pcf\";
1203 static PCF_FONT_POSTSCRIPT_NAME: &str = \"Times-Roman\";
1204
1205 #[test]
1206 fn get_pcf_postscript_name() {
1207 let font = Font::from_path(PCF_FONT_PATH, 0).unwrap();
1208 assert_eq!(font.postscript_name().unwrap(), PCF_FONT_POSTSCRIPT_NAME);
1209 }
1210 ```
1211
1212 1 lines remaining in ancestor node. Read the file to see all.
1213
1214 ## Matches in font-kit/src/sources/core_text.rs
1215
1216 ### mod test › L265-275
1217 ```
1218 mod test {
1219 use crate::properties::{Stretch, Weight};
1220
1221 #[test]
1222 fn test_css_to_core_text_font_weight() {
1223 // Exact matches
1224 assert_eq!(super::css_to_core_text_font_weight(Weight(100.0)), -0.7);
1225 assert_eq!(super::css_to_core_text_font_weight(Weight(400.0)), 0.0);
1226 assert_eq!(super::css_to_core_text_font_weight(Weight(700.0)), 0.4);
1227 assert_eq!(super::css_to_core_text_font_weight(Weight(900.0)), 0.8);
1228
1229 ```
1230
1231 27 lines remaining in ancestor node. Read the file to see all.
1232
1233 ### mod test › L278-282
1234 ```
1235 }
1236
1237 #[test]
1238 fn test_css_to_core_text_font_stretch() {
1239 // Exact matches
1240 ```
1241 "},
1242 )],
1243 ),
1244 ],
1245 input_file_path,
1246 Some(input_file_content.into()),
1247 EvalAssertion::judge_diff(indoc::indoc! {"
1248 - The diff contains a new `from_pixels` constructor
1249 - The diff contains new tests for the `from_pixels` constructor
1250 "}),
1251 ))
1252 });
1253}
1254
1255#[test]
1256#[cfg_attr(not(feature = "unit-eval"), ignore)]
1257fn eval_zode() {
1258 let input_file_path = "root/zode.py";
1259 let input_content = None;
1260
1261 eval_utils::eval(50, 1., eval_utils::NoProcessor, move || {
1262 run_eval(EvalInput::new(
1263 vec![
1264 message(User, [text(include_str!("fixtures/zode/prompt.md"))]),
1265 message(
1266 Assistant,
1267 [
1268 tool_use(
1269 "tool_1",
1270 ReadFileTool::NAME,
1271 ReadFileToolInput {
1272 path: "root/eval/react.py".into(),
1273 start_line: None,
1274 end_line: None,
1275 },
1276 ),
1277 tool_use(
1278 "tool_2",
1279 ReadFileTool::NAME,
1280 ReadFileToolInput {
1281 path: "root/eval/react_test.py".into(),
1282 start_line: None,
1283 end_line: None,
1284 },
1285 ),
1286 ],
1287 ),
1288 message(
1289 User,
1290 [
1291 tool_result(
1292 "tool_1",
1293 ReadFileTool::NAME,
1294 include_str!("fixtures/zode/react.py"),
1295 ),
1296 tool_result(
1297 "tool_2",
1298 ReadFileTool::NAME,
1299 include_str!("fixtures/zode/react_test.py"),
1300 ),
1301 ],
1302 ),
1303 ],
1304 input_file_path,
1305 input_content.clone(),
1306 EvalAssertion::new(async move |sample, _, _cx| {
1307 let invalid_starts = [' ', '`', '\n'];
1308 let mut message = String::new();
1309 for start in invalid_starts {
1310 if sample.text_after.starts_with(start) {
1311 message.push_str(&format!("The sample starts with a {:?}\n", start));
1312 break;
1313 }
1314 }
1315 message.pop();
1316
1317 if message.is_empty() {
1318 Ok(EvalAssertionOutcome {
1319 score: 100,
1320 message: None,
1321 })
1322 } else {
1323 Ok(EvalAssertionOutcome {
1324 score: 0,
1325 message: Some(message),
1326 })
1327 }
1328 }),
1329 ))
1330 });
1331}
1332
1333#[test]
1334#[cfg_attr(not(feature = "unit-eval"), ignore)]
1335fn eval_add_overwrite_test() {
1336 let input_file_path = "root/action_log.rs";
1337 let input_file_content = include_str!("fixtures/add_overwrite_test/before.rs");
1338
1339 eval_utils::eval(200, 0.5, eval_utils::NoProcessor, move || {
1340 run_eval(EvalInput::new(
1341 vec![
1342 message(
1343 User,
1344 [text(indoc::indoc! {"
1345 Introduce a new test in `action_log.rs` to test overwriting a file.
1346 That is, a file already exists, but we call `buffer_created` as if the file were new.
1347 Take inspiration from all the other tests in the file.
1348 "})],
1349 ),
1350 message(
1351 Assistant,
1352 [tool_use(
1353 "tool_1",
1354 ReadFileTool::NAME,
1355 ReadFileToolInput {
1356 path: input_file_path.into(),
1357 start_line: None,
1358 end_line: None,
1359 },
1360 )],
1361 ),
1362 message(
1363 User,
1364 [tool_result(
1365 "tool_1",
1366 ReadFileTool::NAME,
1367 indoc::indoc! {"
1368 pub struct ActionLog [L13-20]
1369 tracked_buffers [L15]
1370 edited_since_project_diagnostics_check [L17]
1371 project [L19]
1372 impl ActionLog [L22-498]
1373 pub fn new [L24-30]
1374 pub fn project [L32-34]
1375 pub fn checked_project_diagnostics [L37-39]
1376 pub fn has_edited_files_since_project_diagnostics_check [L42-44]
1377 fn track_buffer_internal [L46-101]
1378 fn handle_buffer_event [L103-116]
1379 fn handle_buffer_edited [L118-123]
1380 fn handle_buffer_file_changed [L125-158]
1381 async fn maintain_diff [L160-264]
1382 pub fn buffer_read [L267-269]
1383 pub fn buffer_created [L272-276]
1384 pub fn buffer_edited [L279-287]
1385 pub fn will_delete_buffer [L289-304]
1386 pub fn keep_edits_in_range [L306-364]
1387 pub fn reject_edits_in_ranges [L366-459]
1388 pub fn keep_all_edits [L461-473]
1389 pub fn changed_buffers [L476-482]
1390 pub fn stale_buffers [L485-497]
1391 fn apply_non_conflicting_edits [L500-561]
1392 fn diff_snapshots [L563-585]
1393 fn point_to_row_edit [L587-614]
1394 enum ChangeAuthor [L617-620]
1395 User [L618]
1396 Agent [L619]
1397 enum TrackedBufferStatus [L623-627]
1398 Created [L624]
1399 Modified [L625]
1400 Deleted [L626]
1401 struct TrackedBuffer [L629-641]
1402 buffer [L630]
1403 base_text [L631]
1404 unreviewed_changes [L632]
1405 status [L633]
1406 version [L634]
1407 diff [L635]
1408 snapshot [L636]
1409 diff_update [L637]
1410 _open_lsp_handle [L638]
1411 _maintain_diff [L639]
1412 _subscription [L640]
1413 impl TrackedBuffer [L643-657]
1414 fn has_changes [L644-650]
1415 fn schedule_diff_update [L652-656]
1416 pub struct ChangedBuffer [L659-661]
1417 pub diff [L660]
1418 mod tests [L664-1574]
1419 fn init_logger [L678-682]
1420 fn init_test [L684-691]
1421 async fn test_keep_edits [L694-769]
1422 async fn test_deletions [L772-854]
1423 async fn test_overlapping_user_edits [L857-951]
1424 async fn test_creating_files [L954-1010]
1425 async fn test_deleting_files [L1013-1120]
1426 async fn test_reject_edits [L1123-1255]
1427 async fn test_reject_multiple_edits [L1258-1331]
1428 async fn test_reject_deleted_file [L1334-1388]
1429 async fn test_reject_created_file [L1391-1443]
1430 async fn test_random_diffs [L1446-1535]
1431 fn quiesce [L1510-1534]
1432 struct HunkStatus [L1538-1542]
1433 range [L1539]
1434 diff_status [L1540]
1435 old_text [L1541]
1436 fn unreviewed_hunks [L1544-1573]
1437
1438 Showing symbols 1-69 (total symbols: 69)
1439
1440 Using the line numbers in this outline, you can call this tool again while specifying
1441 the start_line and end_line fields to see the implementations of symbols in the outline.
1442 "},
1443 )],
1444 ),
1445 message(
1446 Assistant,
1447 [
1448 text(
1449 "Let's look at the existing test cases that are related to file creation to understand the pattern:",
1450 ),
1451 tool_use(
1452 "tool_2",
1453 ReadFileTool::NAME,
1454 ReadFileToolInput {
1455 path: input_file_path.into(),
1456 start_line: Some(953),
1457 end_line: Some(1010),
1458 },
1459 ),
1460 ],
1461 ),
1462 message(
1463 User,
1464 [tool_result(
1465 "tool_2",
1466 ReadFileTool::NAME,
1467 lines(input_file_content, 953..1010),
1468 )],
1469 ),
1470 message(
1471 Assistant,
1472 [
1473 text(
1474 "Now let's look at another relevant test to understand how they're structured:",
1475 ),
1476 tool_use(
1477 "tool_3",
1478 ReadFileTool::NAME,
1479 ReadFileToolInput {
1480 path: input_file_path.into(),
1481 start_line: Some(1012),
1482 end_line: Some(1120),
1483 },
1484 ),
1485 ],
1486 ),
1487 message(
1488 User,
1489 [tool_result(
1490 "tool_3",
1491 ReadFileTool::NAME,
1492 lines(input_file_content, 1012..1120),
1493 )],
1494 ),
1495 message(
1496 Assistant,
1497 [
1498 text("Now let's look at how `buffer_created` is implemented:"),
1499 tool_use(
1500 "tool_4",
1501 ReadFileTool::NAME,
1502 ReadFileToolInput {
1503 path: input_file_path.into(),
1504 start_line: Some(271),
1505 end_line: Some(276),
1506 },
1507 ),
1508 ],
1509 ),
1510 message(
1511 User,
1512 [tool_result(
1513 "tool_4",
1514 ReadFileTool::NAME,
1515 lines(input_file_content, 271..276),
1516 )],
1517 ),
1518 ],
1519 input_file_path,
1520 Some(input_file_content.into()),
1521 EvalAssertion::judge_diff(
1522 "A new test for overwritten files was created, without changing any previous test",
1523 ),
1524 ))
1525 });
1526}
1527
1528#[test]
1529#[cfg_attr(not(feature = "unit-eval"), ignore)]
1530fn eval_create_empty_file() {
1531 let input_file_path = "root/TODO3";
1532 let input_file_content = None;
1533 let expected_output_content = String::new();
1534
1535 eval_utils::eval(100, 0.99, eval_utils::NoProcessor, move || {
1536 run_eval(EvalInput::new(
1537 vec![
1538 message(User, [text("Create a second empty todo file ")]),
1539 message(
1540 Assistant,
1541 [
1542 text(indoc::formatdoc! {"
1543 I'll help you create a second empty todo file.
1544 First, let me examine the project structure to see if there's already a todo file, which will help me determine the appropriate name and location for the second one.
1545 "}),
1546 tool_use(
1547 "toolu_01GAF8TtsgpjKxCr8fgQLDgR",
1548 ListDirectoryTool::NAME,
1549 ListDirectoryToolInput {
1550 path: "root".to_string(),
1551 },
1552 ),
1553 ],
1554 ),
1555 message(
1556 User,
1557 [tool_result(
1558 "toolu_01GAF8TtsgpjKxCr8fgQLDgR",
1559 ListDirectoryTool::NAME,
1560 "root/TODO\nroot/TODO2\nroot/new.txt\n",
1561 )],
1562 ),
1563 ],
1564 input_file_path,
1565 input_file_content.clone(),
1566 EvalAssertion::assert_eq(expected_output_content.clone()),
1567 ))
1568 });
1569}