1use agent::GrepToolInput;
2use agent_settings::AgentProfileId;
3use anyhow::Result;
4use async_trait::async_trait;
5
6use crate::example::{Example, ExampleContext, ExampleMetadata};
7
8pub struct GrepParamsEscapementExample;
9
10/*
11
12This eval checks that the model doesn't use HTML escapement for characters like `<` and
13`>` in tool parameters.
14
15 original +system_prompt change +tool description
16 claude-opus-4 89% 92% 97%+
17 claude-sonnet-4 100%
18 gpt-4.1-mini 100%
19 gemini-2.5-pro 98%
20
21*/
22
23#[async_trait(?Send)]
24impl Example for GrepParamsEscapementExample {
25 fn meta(&self) -> ExampleMetadata {
26 ExampleMetadata {
27 name: "grep_params_escapement".to_string(),
28 url: "https://github.com/octocat/hello-world".to_string(),
29 revision: "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d".to_string(),
30 language_server: None,
31 max_assertions: Some(1),
32 profile_id: AgentProfileId::default(),
33 existing_thread_json: None,
34 max_turns: Some(2),
35 }
36 }
37
38 async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
39 let response = cx
40 .prompt_with_max_turns("Search for files containing the characters `>` or `<`", 2)
41 .await?;
42 let grep_input = response
43 .find_tool_call("grep")
44 .and_then(|tool_use| tool_use.parse_input::<GrepToolInput>().ok());
45
46 cx.assert_some(grep_input.as_ref(), "`grep` tool should be called")?;
47
48 cx.assert(
49 !contains_html_entities(&grep_input.unwrap().regex),
50 "Tool parameters should not be escaped",
51 )
52 }
53}
54
55fn contains_html_entities(pattern: &str) -> bool {
56 regex::Regex::new(r"&[a-zA-Z]+;|&#[0-9]+;|&#x[0-9a-fA-F]+;")
57 .unwrap()
58 .is_match(pattern)
59}