1use agent_settings::AgentProfileId;
2use anyhow::Result;
3use async_trait::async_trait;
4use markdown::PathWithRange;
5
6use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
7
8pub struct CodeBlockCitations;
9
10const FENCE: &str = "```";
11
12#[async_trait(?Send)]
13impl Example for CodeBlockCitations {
14 fn meta(&self) -> ExampleMetadata {
15 ExampleMetadata {
16 name: "code_block_citations".to_string(),
17 url: "https://github.com/zed-industries/zed.git".to_string(),
18 revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
19 language_server: Some(LanguageServer {
20 file_extension: "rs".to_string(),
21 allow_preexisting_diagnostics: false,
22 }),
23 max_assertions: None,
24 profile_id: AgentProfileId::default(),
25 existing_thread_json: None,
26 }
27 }
28
29 async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
30 const FILENAME: &str = "assistant_tool.rs";
31 cx.push_user_message(format!(
32 r#"
33 Show me the method bodies of all the methods of the `Tool` trait in {FILENAME}.
34
35 Please show each method in a separate code snippet.
36 "#
37 ));
38
39 // Verify that the messages all have the correct formatting.
40 let texts: Vec<String> = cx.run_to_end().await?.texts().collect();
41 let closing_fence = format!("\n{FENCE}");
42
43 for text in texts.iter() {
44 let mut text = text.as_str();
45
46 while let Some(index) = text.find(FENCE) {
47 // Advance text past the opening backticks.
48 text = &text[index + FENCE.len()..];
49
50 // Find the closing backticks.
51 let content_len = text.find(&closing_fence);
52
53 // Verify the citation format - e.g. ```path/to/foo.txt#L123-456
54 if let Some(citation_len) = text.find('\n') {
55 let citation = &text[..citation_len];
56
57 if let Ok(()) =
58 cx.assert(citation.contains("/"), format!("Slash in {citation:?}",))
59 {
60 let path_range = PathWithRange::new(citation);
61 let path = cx
62 .agent_thread()
63 .update(cx, |thread, cx| {
64 thread
65 .project()
66 .read(cx)
67 .find_project_path(path_range.path, cx)
68 })
69 .ok()
70 .flatten();
71
72 if let Ok(path) = cx.assert_some(path, format!("Valid path: {citation:?}"))
73 {
74 let buffer_text = {
75 let buffer = match cx.agent_thread().update(cx, |thread, cx| {
76 thread
77 .project()
78 .update(cx, |project, cx| project.open_buffer(path, cx))
79 }) {
80 Ok(buffer_task) => buffer_task.await.ok(),
81 Err(err) => {
82 cx.assert(
83 false,
84 format!("Expected Ok(buffer), not {err:?}"),
85 )
86 .ok();
87 break;
88 }
89 };
90
91 let Ok(buffer_text) = cx.assert_some(
92 buffer.and_then(|buffer| {
93 buffer.read_with(cx, |buffer, _| buffer.text()).ok()
94 }),
95 "Reading buffer text succeeded",
96 ) else {
97 continue;
98 };
99 buffer_text
100 };
101
102 if let Some(content_len) = content_len {
103 // + 1 because there's a newline character after the citation.
104 let start_index = citation.len() + 1;
105 let end_index = content_len.saturating_sub(start_index);
106
107 if cx
108 .assert(
109 start_index <= end_index,
110 "Code block had a valid citation",
111 )
112 .is_ok()
113 {
114 let content = &text[start_index..end_index];
115
116 // deindent (trim the start of each line) because sometimes the model
117 // chooses to deindent its code snippets for the sake of readability,
118 // which in markdown is not only reasonable but usually desirable.
119 cx.assert(
120 deindent(&buffer_text)
121 .trim()
122 .contains(deindent(&content).trim()),
123 "Code block content was found in file",
124 )
125 .ok();
126
127 if let Some(range) = path_range.range {
128 let start_line_index = range.start.line.saturating_sub(1);
129 let line_count =
130 range.end.line.saturating_sub(start_line_index);
131 let mut snippet = buffer_text
132 .lines()
133 .skip(start_line_index as usize)
134 .take(line_count as usize)
135 .collect::<Vec<&str>>()
136 .join("\n");
137
138 if let Some(start_col) = range.start.col {
139 snippet = snippet[start_col as usize..].to_string();
140 }
141
142 if let Some(end_col) = range.end.col {
143 let last_line = snippet.lines().last().unwrap();
144 snippet = snippet[..snippet.len() - last_line.len()
145 + end_col as usize]
146 .to_string();
147 }
148
149 // deindent (trim the start of each line) because sometimes the model
150 // chooses to deindent its code snippets for the sake of readability,
151 // which in markdown is not only reasonable but usually desirable.
152 cx.assert_eq(
153 deindent(snippet.as_str()).trim(),
154 deindent(content).trim(),
155 format!(
156 "Code block was at {:?}-{:?}",
157 range.start, range.end
158 ),
159 )
160 .ok();
161 }
162 }
163 }
164 }
165 }
166 } else {
167 cx.assert(
168 false,
169 format!("Opening {FENCE} did not have a newline anywhere after it."),
170 )
171 .ok();
172 }
173
174 if let Some(content_len) = content_len {
175 // Advance past the closing backticks
176 text = &text[content_len + FENCE.len()..];
177 } else {
178 // There were no closing backticks associated with these opening backticks.
179 cx.assert(
180 false,
181 "Code block opening had matching closing backticks.".to_string(),
182 )
183 .ok();
184
185 // There are no more code blocks to parse, so we're done.
186 break;
187 }
188 }
189 }
190
191 Ok(())
192 }
193
194 fn thread_assertions(&self) -> Vec<JudgeAssertion> {
195 vec![
196 JudgeAssertion {
197 id: "trait method bodies are shown".to_string(),
198 description:
199 "All method bodies of the Tool trait are shown."
200 .to_string(),
201 },
202 JudgeAssertion {
203 id: "code blocks used".to_string(),
204 description:
205 "All code snippets are rendered inside markdown code blocks (as opposed to any other formatting besides code blocks)."
206 .to_string(),
207 },
208 JudgeAssertion {
209 id: "code blocks use backticks".to_string(),
210 description:
211 format!("All markdown code blocks use backtick fences ({FENCE}) rather than indentation.")
212 }
213 ]
214 }
215}
216
217fn deindent(as_str: impl AsRef<str>) -> String {
218 as_str
219 .as_ref()
220 .lines()
221 .map(|line| line.trim_start())
222 .collect::<Vec<&str>>()
223 .join("\n")
224}