code_block_citations.rs

  1use anyhow::Result;
  2use async_trait::async_trait;
  3use markdown::PathWithRange;
  4
  5use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
  6
  7pub struct CodeBlockCitations;
  8
  9const FENCE: &str = "```";
 10
 11#[async_trait(?Send)]
 12impl Example for CodeBlockCitations {
 13    fn meta(&self) -> ExampleMetadata {
 14        ExampleMetadata {
 15            name: "code_block_citations".to_string(),
 16            url: "https://github.com/zed-industries/zed.git".to_string(),
 17            revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
 18            language_server: Some(LanguageServer {
 19                file_extension: "rs".to_string(),
 20                allow_preexisting_diagnostics: false,
 21            }),
 22            max_assertions: None,
 23        }
 24    }
 25
 26    async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
 27        const FILENAME: &str = "assistant_tool.rs";
 28        cx.push_user_message(format!(
 29            r#"
 30            Show me the method bodies of all the methods of the `Tool` trait in {FILENAME}.
 31
 32            Please show each method in a separate code snippet.
 33            "#
 34        ));
 35
 36        // Verify that the messages all have the correct formatting.
 37        let texts: Vec<String> = cx.run_to_end().await?.texts().collect();
 38        let closing_fence = format!("\n{FENCE}");
 39
 40        for text in texts.iter() {
 41            let mut text = text.as_str();
 42
 43            while let Some(index) = text.find(FENCE) {
 44                // Advance text past the opening backticks.
 45                text = &text[index + FENCE.len()..];
 46
 47                // Find the closing backticks.
 48                let content_len = text.find(&closing_fence);
 49
 50                // Verify the citation format - e.g. ```path/to/foo.txt#L123-456
 51                if let Some(citation_len) = text.find('\n') {
 52                    let citation = &text[..citation_len];
 53
 54                    if let Ok(()) =
 55                        cx.assert(citation.contains("/"), format!("Slash in {citation:?}",))
 56                    {
 57                        let path_range = PathWithRange::new(citation);
 58                        let path = cx
 59                            .agent_thread()
 60                            .update(cx, |thread, cx| {
 61                                thread
 62                                    .project()
 63                                    .read(cx)
 64                                    .find_project_path(path_range.path, cx)
 65                            })
 66                            .ok()
 67                            .flatten();
 68
 69                        if let Ok(path) = cx.assert_some(path, format!("Valid path: {citation:?}"))
 70                        {
 71                            let buffer_text = {
 72                                let buffer = match cx.agent_thread().update(cx, |thread, cx| {
 73                                    thread
 74                                        .project()
 75                                        .update(cx, |project, cx| project.open_buffer(path, cx))
 76                                }) {
 77                                    Ok(buffer_task) => buffer_task.await.ok(),
 78                                    Err(err) => {
 79                                        cx.assert(
 80                                            false,
 81                                            format!("Expected Ok(buffer), not {err:?}"),
 82                                        )
 83                                        .ok();
 84                                        break;
 85                                    }
 86                                };
 87
 88                                let Ok(buffer_text) = cx.assert_some(
 89                                    buffer.and_then(|buffer| {
 90                                        buffer.read_with(cx, |buffer, _| buffer.text()).ok()
 91                                    }),
 92                                    "Reading buffer text succeeded",
 93                                ) else {
 94                                    continue;
 95                                };
 96                                buffer_text
 97                            };
 98
 99                            if let Some(content_len) = content_len {
100                                // + 1 because there's a newline character after the citation.
101                                let start_index = citation.len() + 1;
102                                let end_index = content_len.saturating_sub(start_index);
103
104                                if cx
105                                    .assert(
106                                        start_index <= end_index,
107                                        "Code block had a valid citation",
108                                    )
109                                    .is_ok()
110                                {
111                                    let content = &text[start_index..end_index];
112
113                                    // deindent (trim the start of each line) because sometimes the model
114                                    // chooses to deindent its code snippets for the sake of readability,
115                                    // which in markdown is not only reasonable but usually desirable.
116                                    cx.assert(
117                                        deindent(&buffer_text)
118                                            .trim()
119                                            .contains(deindent(&content).trim()),
120                                        "Code block content was found in file",
121                                    )
122                                    .ok();
123
124                                    if let Some(range) = path_range.range {
125                                        let start_line_index = range.start.line.saturating_sub(1);
126                                        let line_count =
127                                            range.end.line.saturating_sub(start_line_index);
128                                        let mut snippet = buffer_text
129                                            .lines()
130                                            .skip(start_line_index as usize)
131                                            .take(line_count as usize)
132                                            .collect::<Vec<&str>>()
133                                            .join("\n");
134
135                                        if let Some(start_col) = range.start.col {
136                                            snippet = snippet[start_col as usize..].to_string();
137                                        }
138
139                                        if let Some(end_col) = range.end.col {
140                                            let last_line = snippet.lines().last().unwrap();
141                                            snippet = snippet[..snippet.len() - last_line.len()
142                                                + end_col as usize]
143                                                .to_string();
144                                        }
145
146                                        // deindent (trim the start of each line) because sometimes the model
147                                        // chooses to deindent its code snippets for the sake of readability,
148                                        // which in markdown is not only reasonable but usually desirable.
149                                        cx.assert_eq(
150                                            deindent(snippet.as_str()).trim(),
151                                            deindent(content).trim(),
152                                            format!(
153                                                "Code block was at {:?}-{:?}",
154                                                range.start, range.end
155                                            ),
156                                        )
157                                        .ok();
158                                    }
159                                }
160                            }
161                        }
162                    }
163                } else {
164                    cx.assert(
165                        false,
166                        format!("Opening {FENCE} did not have a newline anywhere after it."),
167                    )
168                    .ok();
169                }
170
171                if let Some(content_len) = content_len {
172                    // Advance past the closing backticks
173                    text = &text[content_len + FENCE.len()..];
174                } else {
175                    // There were no closing backticks associated with these opening backticks.
176                    cx.assert(
177                        false,
178                        "Code block opening had matching closing backticks.".to_string(),
179                    )
180                    .ok();
181
182                    // There are no more code blocks to parse, so we're done.
183                    break;
184                }
185            }
186        }
187
188        Ok(())
189    }
190
191    fn thread_assertions(&self) -> Vec<JudgeAssertion> {
192        vec![
193            JudgeAssertion {
194                id: "trait method bodies are shown".to_string(),
195                description:
196                    "All method bodies of the Tool trait are shown."
197                        .to_string(),
198            },
199            JudgeAssertion {
200                id: "code blocks used".to_string(),
201                description:
202                   "All code snippets are rendered inside markdown code blocks (as opposed to any other formatting besides code blocks)."
203                        .to_string(),
204            },
205            JudgeAssertion {
206              id: "code blocks use backticks".to_string(),
207              description:
208                  format!("All markdown code blocks use backtick fences ({FENCE}) rather than indentation.")
209            }
210        ]
211    }
212}
213
214fn deindent(as_str: impl AsRef<str>) -> String {
215    as_str
216        .as_ref()
217        .lines()
218        .map(|line| line.trim_start())
219        .collect::<Vec<&str>>()
220        .join("\n")
221}