1use agent_settings::AgentProfileId;
2use anyhow::Result;
3use async_trait::async_trait;
4use markdown::PathWithRange;
5
6use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
7
8pub struct CodeBlockCitations;
9
10const FENCE: &str = "```";
11
12#[async_trait(?Send)]
13impl Example for CodeBlockCitations {
14 fn meta(&self) -> ExampleMetadata {
15 ExampleMetadata {
16 name: "code_block_citations".to_string(),
17 url: "https://github.com/zed-industries/zed.git".to_string(),
18 revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
19 language_server: Some(LanguageServer {
20 file_extension: "rs".to_string(),
21 allow_preexisting_diagnostics: false,
22 }),
23 max_assertions: None,
24 profile_id: AgentProfileId::default(),
25 existing_thread_json: None,
26 max_turns: None,
27 }
28 }
29
30 async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
31 const FILENAME: &str = "assistant_tool.rs";
32 cx.push_user_message(format!(
33 r#"
34 Show me the method bodies of all the methods of the `Tool` trait in {FILENAME}.
35
36 Please show each method in a separate code snippet.
37 "#
38 ));
39
40 // Verify that the messages all have the correct formatting.
41 let texts: Vec<String> = cx.run_to_end().await?.texts().collect();
42 let closing_fence = format!("\n{FENCE}");
43
44 for text in texts.iter() {
45 let mut text = text.as_str();
46
47 while let Some(index) = text.find(FENCE) {
48 // Advance text past the opening backticks.
49 text = &text[index + FENCE.len()..];
50
51 // Find the closing backticks.
52 let content_len = text.find(&closing_fence);
53
54 // Verify the citation format - e.g. ```path/to/foo.txt#L123-456
55 if let Some(citation_len) = text.find('\n') {
56 let citation = &text[..citation_len];
57
58 if let Ok(()) =
59 cx.assert(citation.contains("/"), format!("Slash in {citation:?}",))
60 {
61 let path_range = PathWithRange::new(citation);
62 let path = cx
63 .agent_thread()
64 .update(cx, |thread, cx| {
65 thread
66 .project()
67 .read(cx)
68 .find_project_path(path_range.path.as_ref(), cx)
69 })
70 .ok()
71 .flatten();
72
73 if let Ok(path) = cx.assert_some(path, format!("Valid path: {citation:?}"))
74 {
75 let buffer_text = {
76 let buffer = match cx.agent_thread().update(cx, |thread, cx| {
77 thread
78 .project()
79 .update(cx, |project, cx| project.open_buffer(path, cx))
80 }) {
81 Ok(buffer_task) => buffer_task.await.ok(),
82 Err(err) => {
83 cx.assert(
84 false,
85 format!("Expected Ok(buffer), not {err:?}"),
86 )
87 .ok();
88 break;
89 }
90 };
91
92 let Ok(buffer_text) = cx.assert_some(
93 buffer.and_then(|buffer| {
94 buffer.read_with(cx, |buffer, _| buffer.text()).ok()
95 }),
96 "Reading buffer text succeeded",
97 ) else {
98 continue;
99 };
100 buffer_text
101 };
102
103 if let Some(content_len) = content_len {
104 // + 1 because there's a newline character after the citation.
105 let start_index = citation.len() + 1;
106 let end_index = content_len.saturating_sub(start_index);
107
108 if cx
109 .assert(
110 start_index <= end_index,
111 "Code block had a valid citation",
112 )
113 .is_ok()
114 {
115 let content = &text[start_index..end_index];
116
117 // deindent (trim the start of each line) because sometimes the model
118 // chooses to deindent its code snippets for the sake of readability,
119 // which in markdown is not only reasonable but usually desirable.
120 cx.assert(
121 deindent(&buffer_text)
122 .trim()
123 .contains(deindent(&content).trim()),
124 "Code block content was found in file",
125 )
126 .ok();
127
128 if let Some(range) = path_range.range {
129 let start_line_index = range.start.line.saturating_sub(1);
130 let line_count =
131 range.end.line.saturating_sub(start_line_index);
132 let mut snippet = buffer_text
133 .lines()
134 .skip(start_line_index as usize)
135 .take(line_count as usize)
136 .collect::<Vec<&str>>()
137 .join("\n");
138
139 if let Some(start_col) = range.start.col {
140 snippet = snippet[start_col as usize..].to_string();
141 }
142
143 if let Some(end_col) = range.end.col {
144 let last_line = snippet.lines().last().unwrap();
145 snippet = snippet[..snippet.len() - last_line.len()
146 + end_col as usize]
147 .to_string();
148 }
149
150 // deindent (trim the start of each line) because sometimes the model
151 // chooses to deindent its code snippets for the sake of readability,
152 // which in markdown is not only reasonable but usually desirable.
153 cx.assert_eq(
154 deindent(snippet.as_str()).trim(),
155 deindent(content).trim(),
156 format!(
157 "Code block was at {:?}-{:?}",
158 range.start, range.end
159 ),
160 )
161 .ok();
162 }
163 }
164 }
165 }
166 }
167 } else {
168 cx.assert(
169 false,
170 format!("Opening {FENCE} did not have a newline anywhere after it."),
171 )
172 .ok();
173 }
174
175 if let Some(content_len) = content_len {
176 // Advance past the closing backticks
177 text = &text[content_len + FENCE.len()..];
178 } else {
179 // There were no closing backticks associated with these opening backticks.
180 cx.assert(
181 false,
182 "Code block opening had matching closing backticks.".to_string(),
183 )
184 .ok();
185
186 // There are no more code blocks to parse, so we're done.
187 break;
188 }
189 }
190 }
191
192 Ok(())
193 }
194
195 fn thread_assertions(&self) -> Vec<JudgeAssertion> {
196 vec![
197 JudgeAssertion {
198 id: "trait method bodies are shown".to_string(),
199 description:
200 "All method bodies of the Tool trait are shown."
201 .to_string(),
202 },
203 JudgeAssertion {
204 id: "code blocks used".to_string(),
205 description:
206 "All code snippets are rendered inside markdown code blocks (as opposed to any other formatting besides code blocks)."
207 .to_string(),
208 },
209 JudgeAssertion {
210 id: "code blocks use backticks".to_string(),
211 description:
212 format!("All markdown code blocks use backtick fences ({FENCE}) rather than indentation.")
213 }
214 ]
215 }
216}
217
218fn deindent(as_str: impl AsRef<str>) -> String {
219 as_str
220 .as_ref()
221 .lines()
222 .map(|line| line.trim_start())
223 .collect::<Vec<&str>>()
224 .join("\n")
225}