1use agent_settings::AgentProfileId;
2use anyhow::Result;
3use async_trait::async_trait;
4use markdown::PathWithRange;
5
6use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
7
8pub struct CodeBlockCitations;
9
10const FENCE: &str = "```";
11
12#[async_trait(?Send)]
13impl Example for CodeBlockCitations {
14 fn meta(&self) -> ExampleMetadata {
15 ExampleMetadata {
16 name: "code_block_citations".to_string(),
17 url: "https://github.com/zed-industries/zed.git".to_string(),
18 revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
19 language_server: Some(LanguageServer {
20 file_extension: "rs".to_string(),
21 allow_preexisting_diagnostics: false,
22 }),
23 max_assertions: None,
24 profile_id: AgentProfileId::default(),
25 existing_thread_json: None,
26 max_turns: None,
27 }
28 }
29
30 async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
31 const FILENAME: &str = "assistant_tool.rs";
32
33 // Verify that the messages all have the correct formatting.
34 let texts: Vec<String> = cx
35 .prompt(format!(
36 r#"
37 Show me the method bodies of all the methods of the `Tool` trait in {FILENAME}.
38
39 Please show each method in a separate code snippet.
40 "#
41 ))
42 .await?
43 .texts()
44 .collect();
45 let closing_fence = format!("\n{FENCE}");
46
47 for text in texts.iter() {
48 let mut text = text.as_str();
49
50 while let Some(index) = text.find(FENCE) {
51 // Advance text past the opening backticks.
52 text = &text[index + FENCE.len()..];
53
54 // Find the closing backticks.
55 let content_len = text.find(&closing_fence);
56
57 // Verify the citation format - e.g. ```path/to/foo.txt#L123-456
58 if let Some(citation_len) = text.find('\n') {
59 let citation = &text[..citation_len];
60
61 if let Ok(()) =
62 cx.assert(citation.contains("/"), format!("Slash in {citation:?}",))
63 {
64 let path_range = PathWithRange::new(citation);
65 let path = cx
66 .agent_thread()
67 .update(cx, |thread, cx| {
68 thread
69 .project()
70 .read(cx)
71 .find_project_path(path_range.path.as_ref(), cx)
72 })
73 .ok()
74 .flatten();
75
76 if let Ok(path) = cx.assert_some(path, format!("Valid path: {citation:?}"))
77 {
78 let buffer_text = {
79 let buffer = match cx.agent_thread().update(cx, |thread, cx| {
80 thread
81 .project()
82 .update(cx, |project, cx| project.open_buffer(path, cx))
83 }) {
84 Ok(buffer_task) => buffer_task.await.ok(),
85 Err(err) => {
86 cx.assert(
87 false,
88 format!("Expected Ok(buffer), not {err:?}"),
89 )
90 .ok();
91 break;
92 }
93 };
94
95 let Ok(buffer_text) = cx.assert_some(
96 buffer.and_then(|buffer| {
97 buffer.read_with(cx, |buffer, _| buffer.text()).ok()
98 }),
99 "Reading buffer text succeeded",
100 ) else {
101 continue;
102 };
103 buffer_text
104 };
105
106 if let Some(content_len) = content_len {
107 // + 1 because there's a newline character after the citation.
108 let start_index = citation.len() + 1;
109 let end_index = content_len.saturating_sub(start_index);
110
111 if cx
112 .assert(
113 start_index <= end_index,
114 "Code block had a valid citation",
115 )
116 .is_ok()
117 {
118 let content = &text[start_index..end_index];
119
120 // deindent (trim the start of each line) because sometimes the model
121 // chooses to deindent its code snippets for the sake of readability,
122 // which in markdown is not only reasonable but usually desirable.
123 cx.assert(
124 deindent(&buffer_text)
125 .trim()
126 .contains(deindent(&content).trim()),
127 "Code block content was found in file",
128 )
129 .ok();
130
131 if let Some(range) = path_range.range {
132 let start_line_index = range.start.line.saturating_sub(1);
133 let line_count =
134 range.end.line.saturating_sub(start_line_index);
135 let mut snippet = buffer_text
136 .lines()
137 .skip(start_line_index as usize)
138 .take(line_count as usize)
139 .collect::<Vec<&str>>()
140 .join("\n");
141
142 if let Some(start_col) = range.start.col {
143 snippet = snippet[start_col as usize..].to_string();
144 }
145
146 if let Some(end_col) = range.end.col {
147 let last_line = snippet.lines().last().unwrap();
148 snippet = snippet[..snippet.len() - last_line.len()
149 + end_col as usize]
150 .to_string();
151 }
152
153 // deindent (trim the start of each line) because sometimes the model
154 // chooses to deindent its code snippets for the sake of readability,
155 // which in markdown is not only reasonable but usually desirable.
156 cx.assert_eq(
157 deindent(snippet.as_str()).trim(),
158 deindent(content).trim(),
159 format!(
160 "Code block was at {:?}-{:?}",
161 range.start, range.end
162 ),
163 )
164 .ok();
165 }
166 }
167 }
168 }
169 }
170 } else {
171 cx.assert(
172 false,
173 format!("Opening {FENCE} did not have a newline anywhere after it."),
174 )
175 .ok();
176 }
177
178 if let Some(content_len) = content_len {
179 // Advance past the closing backticks
180 text = &text[content_len + FENCE.len()..];
181 } else {
182 // There were no closing backticks associated with these opening backticks.
183 cx.assert(
184 false,
185 "Code block opening had matching closing backticks.".to_string(),
186 )
187 .ok();
188
189 // There are no more code blocks to parse, so we're done.
190 break;
191 }
192 }
193 }
194
195 Ok(())
196 }
197
198 fn thread_assertions(&self) -> Vec<JudgeAssertion> {
199 vec![
200 JudgeAssertion {
201 id: "trait method bodies are shown".to_string(),
202 description:
203 "All method bodies of the Tool trait are shown."
204 .to_string(),
205 },
206 JudgeAssertion {
207 id: "code blocks used".to_string(),
208 description:
209 "All code snippets are rendered inside markdown code blocks (as opposed to any other formatting besides code blocks)."
210 .to_string(),
211 },
212 JudgeAssertion {
213 id: "code blocks use backticks".to_string(),
214 description:
215 format!("All markdown code blocks use backtick fences ({FENCE}) rather than indentation.")
216 }
217 ]
218 }
219}
220
221fn deindent(as_str: impl AsRef<str>) -> String {
222 as_str
223 .as_ref()
224 .lines()
225 .map(|line| line.trim_start())
226 .collect::<Vec<&str>>()
227 .join("\n")
228}