1use anyhow::Result;
2use assistant_settings::AgentProfileId;
3use async_trait::async_trait;
4use markdown::PathWithRange;
5
6use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
7
8pub struct CodeBlockCitations;
9
10const FENCE: &str = "```";
11
12#[async_trait(?Send)]
13impl Example for CodeBlockCitations {
14 fn meta(&self) -> ExampleMetadata {
15 ExampleMetadata {
16 name: "code_block_citations".to_string(),
17 url: "https://github.com/zed-industries/zed.git".to_string(),
18 revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
19 language_server: Some(LanguageServer {
20 file_extension: "rs".to_string(),
21 allow_preexisting_diagnostics: false,
22 }),
23 max_assertions: None,
24 profile_id: AgentProfileId::default(),
25 }
26 }
27
28 async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
29 const FILENAME: &str = "assistant_tool.rs";
30 cx.push_user_message(format!(
31 r#"
32 Show me the method bodies of all the methods of the `Tool` trait in {FILENAME}.
33
34 Please show each method in a separate code snippet.
35 "#
36 ));
37
38 // Verify that the messages all have the correct formatting.
39 let texts: Vec<String> = cx.run_to_end().await?.texts().collect();
40 let closing_fence = format!("\n{FENCE}");
41
42 for text in texts.iter() {
43 let mut text = text.as_str();
44
45 while let Some(index) = text.find(FENCE) {
46 // Advance text past the opening backticks.
47 text = &text[index + FENCE.len()..];
48
49 // Find the closing backticks.
50 let content_len = text.find(&closing_fence);
51
52 // Verify the citation format - e.g. ```path/to/foo.txt#L123-456
53 if let Some(citation_len) = text.find('\n') {
54 let citation = &text[..citation_len];
55
56 if let Ok(()) =
57 cx.assert(citation.contains("/"), format!("Slash in {citation:?}",))
58 {
59 let path_range = PathWithRange::new(citation);
60 let path = cx
61 .agent_thread()
62 .update(cx, |thread, cx| {
63 thread
64 .project()
65 .read(cx)
66 .find_project_path(path_range.path, cx)
67 })
68 .ok()
69 .flatten();
70
71 if let Ok(path) = cx.assert_some(path, format!("Valid path: {citation:?}"))
72 {
73 let buffer_text = {
74 let buffer = match cx.agent_thread().update(cx, |thread, cx| {
75 thread
76 .project()
77 .update(cx, |project, cx| project.open_buffer(path, cx))
78 }) {
79 Ok(buffer_task) => buffer_task.await.ok(),
80 Err(err) => {
81 cx.assert(
82 false,
83 format!("Expected Ok(buffer), not {err:?}"),
84 )
85 .ok();
86 break;
87 }
88 };
89
90 let Ok(buffer_text) = cx.assert_some(
91 buffer.and_then(|buffer| {
92 buffer.read_with(cx, |buffer, _| buffer.text()).ok()
93 }),
94 "Reading buffer text succeeded",
95 ) else {
96 continue;
97 };
98 buffer_text
99 };
100
101 if let Some(content_len) = content_len {
102 // + 1 because there's a newline character after the citation.
103 let start_index = citation.len() + 1;
104 let end_index = content_len.saturating_sub(start_index);
105
106 if cx
107 .assert(
108 start_index <= end_index,
109 "Code block had a valid citation",
110 )
111 .is_ok()
112 {
113 let content = &text[start_index..end_index];
114
115 // deindent (trim the start of each line) because sometimes the model
116 // chooses to deindent its code snippets for the sake of readability,
117 // which in markdown is not only reasonable but usually desirable.
118 cx.assert(
119 deindent(&buffer_text)
120 .trim()
121 .contains(deindent(&content).trim()),
122 "Code block content was found in file",
123 )
124 .ok();
125
126 if let Some(range) = path_range.range {
127 let start_line_index = range.start.line.saturating_sub(1);
128 let line_count =
129 range.end.line.saturating_sub(start_line_index);
130 let mut snippet = buffer_text
131 .lines()
132 .skip(start_line_index as usize)
133 .take(line_count as usize)
134 .collect::<Vec<&str>>()
135 .join("\n");
136
137 if let Some(start_col) = range.start.col {
138 snippet = snippet[start_col as usize..].to_string();
139 }
140
141 if let Some(end_col) = range.end.col {
142 let last_line = snippet.lines().last().unwrap();
143 snippet = snippet[..snippet.len() - last_line.len()
144 + end_col as usize]
145 .to_string();
146 }
147
148 // deindent (trim the start of each line) because sometimes the model
149 // chooses to deindent its code snippets for the sake of readability,
150 // which in markdown is not only reasonable but usually desirable.
151 cx.assert_eq(
152 deindent(snippet.as_str()).trim(),
153 deindent(content).trim(),
154 format!(
155 "Code block was at {:?}-{:?}",
156 range.start, range.end
157 ),
158 )
159 .ok();
160 }
161 }
162 }
163 }
164 }
165 } else {
166 cx.assert(
167 false,
168 format!("Opening {FENCE} did not have a newline anywhere after it."),
169 )
170 .ok();
171 }
172
173 if let Some(content_len) = content_len {
174 // Advance past the closing backticks
175 text = &text[content_len + FENCE.len()..];
176 } else {
177 // There were no closing backticks associated with these opening backticks.
178 cx.assert(
179 false,
180 "Code block opening had matching closing backticks.".to_string(),
181 )
182 .ok();
183
184 // There are no more code blocks to parse, so we're done.
185 break;
186 }
187 }
188 }
189
190 Ok(())
191 }
192
193 fn thread_assertions(&self) -> Vec<JudgeAssertion> {
194 vec![
195 JudgeAssertion {
196 id: "trait method bodies are shown".to_string(),
197 description:
198 "All method bodies of the Tool trait are shown."
199 .to_string(),
200 },
201 JudgeAssertion {
202 id: "code blocks used".to_string(),
203 description:
204 "All code snippets are rendered inside markdown code blocks (as opposed to any other formatting besides code blocks)."
205 .to_string(),
206 },
207 JudgeAssertion {
208 id: "code blocks use backticks".to_string(),
209 description:
210 format!("All markdown code blocks use backtick fences ({FENCE}) rather than indentation.")
211 }
212 ]
213 }
214}
215
216fn deindent(as_str: impl AsRef<str>) -> String {
217 as_str
218 .as_ref()
219 .lines()
220 .map(|line| line.trim_start())
221 .collect::<Vec<&str>>()
222 .join("\n")
223}