evals: Configurable number of max dialog turns (#31680)

Oleksiy Syvokon created

Release Notes:

- N/A

Change summary

crates/eval/src/example.rs                          | 1 +
crates/eval/src/examples/add_arg_to_trait_method.rs | 1 +
crates/eval/src/examples/code_block_citations.rs    | 1 +
crates/eval/src/examples/comment_translation.rs     | 1 +
crates/eval/src/examples/file_search.rs             | 1 +
crates/eval/src/examples/mod.rs                     | 6 +++++-
crates/eval/src/examples/overwrite_file.rs          | 1 +
crates/eval/src/examples/planets.rs                 | 1 +
8 files changed, 12 insertions(+), 1 deletion(-)

Detailed changes

crates/eval/src/example.rs 🔗

@@ -49,6 +49,7 @@ pub struct ExampleMetadata {
     pub max_assertions: Option<usize>,
     pub profile_id: AgentProfileId,
     pub existing_thread_json: Option<String>,
+    pub max_turns: Option<u32>,
 }
 
 #[derive(Clone, Debug)]

crates/eval/src/examples/file_search.rs 🔗

@@ -19,6 +19,7 @@ impl Example for FileSearchExample {
             max_assertions: Some(3),
             profile_id: AgentProfileId::default(),
             existing_thread_json: None,
+            max_turns: None,
         }
     }
 

crates/eval/src/examples/mod.rs 🔗

@@ -82,6 +82,7 @@ impl DeclarativeExample {
             max_assertions: None,
             profile_id,
             existing_thread_json,
+            max_turns: base.max_turns,
         };
 
         Ok(DeclarativeExample {
@@ -124,6 +125,8 @@ pub struct ExampleToml {
     pub thread_assertions: BTreeMap<String, String>,
     #[serde(default)]
     pub existing_thread_path: Option<String>,
+    #[serde(default)]
+    pub max_turns: Option<u32>,
 }
 
 #[async_trait(?Send)]
@@ -134,7 +137,8 @@ impl Example for DeclarativeExample {
 
     async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
         cx.push_user_message(&self.prompt);
-        let _ = cx.run_to_end().await;
+        let max_turns = self.metadata.max_turns.unwrap_or(1000);
+        let _ = cx.run_turns(max_turns).await;
         Ok(())
     }
 

crates/eval/src/examples/overwrite_file.rs 🔗

@@ -31,6 +31,7 @@ impl Example for FileOverwriteExample {
             max_assertions: Some(1),
             profile_id: AgentProfileId::default(),
             existing_thread_json: Some(thread_json.to_string()),
+            max_turns: None,
         }
     }
 

crates/eval/src/examples/planets.rs 🔗

@@ -19,6 +19,7 @@ impl Example for Planets {
             max_assertions: None,
             profile_id: AgentProfileId::default(),
             existing_thread_json: None,
+            max_turns: None,
         }
     }