diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs index cafc5d996f8f5ad33f3352948b206ecc7c82b05e..fa5e95807edd421aaca2e25daf70bb4d0826ac68 100644 --- a/crates/eval/src/example.rs +++ b/crates/eval/src/example.rs @@ -49,6 +49,7 @@ pub struct ExampleMetadata { pub max_assertions: Option, pub profile_id: AgentProfileId, pub existing_thread_json: Option, + pub max_turns: Option, } #[derive(Clone, Debug)] diff --git a/crates/eval/src/examples/add_arg_to_trait_method.rs b/crates/eval/src/examples/add_arg_to_trait_method.rs index b9f306f841ed537e7f238f633c2059a40a8e9fbd..9c538f926059eb3998eb725168905d148dccdc9d 100644 --- a/crates/eval/src/examples/add_arg_to_trait_method.rs +++ b/crates/eval/src/examples/add_arg_to_trait_method.rs @@ -22,6 +22,7 @@ impl Example for AddArgToTraitMethod { max_assertions: None, profile_id: AgentProfileId::default(), existing_thread_json: None, + max_turns: None, } } diff --git a/crates/eval/src/examples/code_block_citations.rs b/crates/eval/src/examples/code_block_citations.rs index f0c2074ce540efe69f1e4594370bf0c6769faeb6..2239ccdfddcc023fdae6f56bd91fd73c1f851ac6 100644 --- a/crates/eval/src/examples/code_block_citations.rs +++ b/crates/eval/src/examples/code_block_citations.rs @@ -23,6 +23,7 @@ impl Example for CodeBlockCitations { max_assertions: None, profile_id: AgentProfileId::default(), existing_thread_json: None, + max_turns: None, } } diff --git a/crates/eval/src/examples/comment_translation.rs b/crates/eval/src/examples/comment_translation.rs index 3a4999bc8554ebc04e8dc702ce20fc8441b2d8d5..b6c9f7376f05fdc38e9f8128c78eb1761bc59c37 100644 --- a/crates/eval/src/examples/comment_translation.rs +++ b/crates/eval/src/examples/comment_translation.rs @@ -17,6 +17,7 @@ impl Example for CommentTranslation { max_assertions: Some(1), profile_id: AgentProfileId::default(), existing_thread_json: None, + max_turns: None, } } diff --git a/crates/eval/src/examples/file_search.rs b/crates/eval/src/examples/file_search.rs index 9056326db9610aa5843b998a6c99646e4802ad44..f1a482a41a952e889b6053e90e9e243ed546d2db 100644 --- a/crates/eval/src/examples/file_search.rs +++ b/crates/eval/src/examples/file_search.rs @@ -19,6 +19,7 @@ impl Example for FileSearchExample { max_assertions: Some(3), profile_id: AgentProfileId::default(), existing_thread_json: None, + max_turns: None, } } diff --git a/crates/eval/src/examples/mod.rs b/crates/eval/src/examples/mod.rs index edf3265186eb4c16907c12bf344dd455885d2991..5968ee2fd0b599152d60702f3fc8baa045fe1e7f 100644 --- a/crates/eval/src/examples/mod.rs +++ b/crates/eval/src/examples/mod.rs @@ -82,6 +82,7 @@ impl DeclarativeExample { max_assertions: None, profile_id, existing_thread_json, + max_turns: base.max_turns, }; Ok(DeclarativeExample { @@ -124,6 +125,8 @@ pub struct ExampleToml { pub thread_assertions: BTreeMap, #[serde(default)] pub existing_thread_path: Option, + #[serde(default)] + pub max_turns: Option, } #[async_trait(?Send)] @@ -134,7 +137,8 @@ impl Example for DeclarativeExample { async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> { cx.push_user_message(&self.prompt); - let _ = cx.run_to_end().await; + let max_turns = self.metadata.max_turns.unwrap_or(1000); + let _ = cx.run_turns(max_turns).await; Ok(()) } diff --git a/crates/eval/src/examples/overwrite_file.rs b/crates/eval/src/examples/overwrite_file.rs index 57c83a40f72f832898f482db1e455e3ec4d25d62..df0b75294c31bf7ff365e96aea18c371b817e710 100644 --- a/crates/eval/src/examples/overwrite_file.rs +++ b/crates/eval/src/examples/overwrite_file.rs @@ -31,6 +31,7 @@ impl Example for FileOverwriteExample { max_assertions: Some(1), profile_id: AgentProfileId::default(), existing_thread_json: Some(thread_json.to_string()), + max_turns: None, } } diff --git a/crates/eval/src/examples/planets.rs b/crates/eval/src/examples/planets.rs index 9363c4ac9a9b21ddc496b9578565370b6bdee815..f3a69332d2c544479ca4f367699dc3def4d83370 100644 --- a/crates/eval/src/examples/planets.rs +++ b/crates/eval/src/examples/planets.rs @@ -19,6 +19,7 @@ impl Example for Planets { max_assertions: None, profile_id: AgentProfileId::default(), existing_thread_json: None, + max_turns: None, } }