Cargo.lock 🔗
@@ -21719,6 +21719,7 @@ dependencies = [
"serde_json",
"settings",
"smol",
+ "strsim",
"thiserror 2.0.17",
"util",
"uuid",
Ben Kunkle and Agus created
Closes #ISSUE
Improves error reporting for various failure modes of zeta2, including
failing to parse the `<old_text>`/`<new_text>` pattern, and the contents
of `<old_text>` failing to match.
Additionally, makes it so that evals are checked out into a worktree
with the _repo_ name instead of the _example_ name, in order to make
sure that the eval name has no influence on the models prediction. The
repo name worktrees are still namespaced by the example name like
`{example_name}/{repo_name}` to ensure evals pointing to the same repo
do not conflict.
Release Notes:
- N/A *or* Added/Fixed/Improved ...
---------
Co-authored-by: Agus <agus@zed.dev>
Cargo.lock | 1
crates/zeta2/Cargo.toml | 3 +
crates/zeta2/src/retrieval_search.rs | 42 ++++++++++++----------
crates/zeta2/src/xml_edits.rs | 56 ++++++++++++++++++++++++++++-
crates/zeta_cli/src/example.rs | 16 +++++---
5 files changed, 89 insertions(+), 29 deletions(-)
@@ -21719,6 +21719,7 @@ dependencies = [
"serde_json",
"settings",
"smol",
+ "strsim",
"thiserror 2.0.17",
"util",
"uuid",
@@ -37,11 +37,13 @@ release_channel.workspace = true
serde.workspace = true
serde_json.workspace = true
smol.workspace = true
+strsim.workspace = true
thiserror.workspace = true
util.workspace = true
uuid.workspace = true
workspace.workspace = true
worktree.workspace = true
+pretty_assertions.workspace = true
[dev-dependencies]
clock = { workspace = true, features = ["test-support"] }
@@ -51,7 +53,6 @@ lsp.workspace = true
indoc.workspace = true
language = { workspace = true, features = ["test-support"] }
language_model = { workspace = true, features = ["test-support"] }
-pretty_assertions.workspace = true
project = { workspace = true, features = ["test-support"] }
settings = { workspace = true, features = ["test-support"] }
zlog.workspace = true
@@ -81,25 +81,7 @@ pub async fn run_retrieval_searches(
for (buffer, ranges) in results.iter_mut() {
if let Some(snapshot) = snapshots.get(&buffer.entity_id()) {
- ranges.sort_unstable_by(|a, b| {
- a.start
- .cmp(&b.start, snapshot)
- .then(b.end.cmp(&b.end, snapshot))
- });
-
- let mut index = 1;
- while index < ranges.len() {
- if ranges[index - 1]
- .end
- .cmp(&ranges[index].start, snapshot)
- .is_gt()
- {
- let removed = ranges.remove(index);
- ranges[index - 1].end = removed.end;
- } else {
- index += 1;
- }
- }
+ merge_anchor_ranges(ranges, snapshot);
}
}
@@ -108,6 +90,28 @@ pub async fn run_retrieval_searches(
.await
}
+fn merge_anchor_ranges(ranges: &mut Vec<Range<Anchor>>, snapshot: &BufferSnapshot) {
+ ranges.sort_unstable_by(|a, b| {
+ a.start
+ .cmp(&b.start, snapshot)
+ .then(b.end.cmp(&b.end, snapshot))
+ });
+
+ let mut index = 1;
+ while index < ranges.len() {
+ if ranges[index - 1]
+ .end
+ .cmp(&ranges[index].start, snapshot)
+ .is_ge()
+ {
+ let removed = ranges.remove(index);
+ ranges[index - 1].end = removed.end;
+ } else {
+ index += 1;
+ }
+ }
+}
+
const MAX_EXCERPT_LEN: usize = 768;
const MAX_RESULTS_LEN: usize = MAX_EXCERPT_LEN * 5;
@@ -5,6 +5,15 @@ use std::path::Path;
use std::sync::Arc;
pub async fn parse_xml_edits<'a>(
+ input: &'a str,
+ get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range<Anchor>])> + Send,
+) -> Result<(&'a BufferSnapshot, Vec<(Range<Anchor>, Arc<str>)>)> {
+ parse_xml_edits_inner(input, get_buffer)
+ .await
+ .with_context(|| format!("Failed to parse XML edits:\n{input}"))
+}
+
+async fn parse_xml_edits_inner<'a>(
mut input: &'a str,
get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range<Anchor>])> + Send,
) -> Result<(&'a BufferSnapshot, Vec<(Range<Anchor>, Arc<str>)>)> {
@@ -56,13 +65,29 @@ fn resolve_new_text_old_text_in_buffer(
let range = range.to_offset(buffer);
let text = buffer.text_for_range(range.clone()).collect::<String>();
for (match_offset, _) in text.match_indices(old_text) {
- if offset.is_some() {
- anyhow::bail!("old_text is not unique enough:\n{}", old_text);
+ if let Some(offset) = offset {
+ let offset_match_point = buffer.offset_to_point(offset);
+ let second_match_point = buffer.offset_to_point(range.start + match_offset);
+ anyhow::bail!(
+ "old_text is not unique enough:\n{}\nFound at {:?} and {:?}",
+ old_text,
+ offset_match_point,
+ second_match_point
+ );
}
offset = Some(range.start + match_offset);
}
}
- offset.ok_or_else(|| anyhow!("Failed to match old_text:\n{}", old_text))
+ offset.ok_or_else(|| {
+ #[cfg(debug_assertions)]
+ if let Some(closest_match) = closest_old_text_match(buffer, old_text) {
+ log::info!(
+ "Closest `old_text` match: {}",
+ pretty_assertions::StrComparison::new(old_text, &closest_match)
+ )
+ }
+ anyhow!("Failed to match old_text:\n{}", old_text)
+ })
}?;
let edits_within_hunk = language::text_diff(&old_text, &new_text);
@@ -77,6 +102,31 @@ fn resolve_new_text_old_text_in_buffer(
}))
}
+#[cfg(debug_assertions)]
+fn closest_old_text_match(buffer: &TextBufferSnapshot, old_text: &str) -> Option<String> {
+ let buffer_text = buffer.text();
+ let mut cursor = 0;
+ let len = old_text.len();
+
+ let mut min_score = usize::MAX;
+ let mut min_start = 0;
+
+ while cursor + len <= buffer_text.len() {
+ let candidate = &buffer_text[cursor..cursor + len];
+ let score = strsim::levenshtein(candidate, old_text);
+ if score < min_score {
+ min_score = score;
+ min_start = cursor;
+ }
+ cursor += 1;
+ }
+ if min_score != usize::MAX {
+ Some(buffer_text[min_start..min_start + len].to_string())
+ } else {
+ None
+ }
+}
+
struct ParsedTag<'a> {
attributes: &'a str,
body: &'a str,
@@ -315,9 +315,6 @@ impl NamedExample {
let (repo_owner, repo_name) = self.repo_name()?;
let file_name = self.file_name();
- fs::create_dir_all(&*REPOS_DIR)?;
- fs::create_dir_all(&*WORKTREES_DIR)?;
-
let repo_dir = REPOS_DIR.join(repo_owner.as_ref()).join(repo_name.as_ref());
let repo_lock = lock_repo(&repo_dir).await;
@@ -332,7 +329,14 @@ impl NamedExample {
}
// Resolve the example to a revision, fetching it if needed.
- let revision = run_git(&repo_dir, &["rev-parse", &self.example.revision]).await;
+ let revision = run_git(
+ &repo_dir,
+ &[
+ "rev-parse",
+ &format!("{}^{{commit}}", self.example.revision),
+ ],
+ )
+ .await;
let revision = if let Ok(revision) = revision {
revision
} else {
@@ -349,7 +353,7 @@ impl NamedExample {
};
// Create the worktree for this example if needed.
- let worktree_path = WORKTREES_DIR.join(&file_name);
+ let worktree_path = WORKTREES_DIR.join(&file_name).join(repo_name.as_ref());
if worktree_path.is_dir() {
run_git(&worktree_path, &["clean", "--force", "-d"]).await?;
run_git(&worktree_path, &["reset", "--hard", "HEAD"]).await?;
@@ -477,7 +481,7 @@ impl NamedExample {
let mut matches = text.match_indices(&cursor_excerpt);
let Some((excerpt_offset, _)) = matches.next() else {
anyhow::bail!(
- "Cursor excerpt did not exist in buffer.\nExcerpt:\n\n{cursor_excerpt}\nBuffer text:\n{text}\n"
+ "\nExcerpt:\n\n{cursor_excerpt}\nBuffer text:\n{text}\n.Cursor excerpt did not exist in buffer."
);
};
assert!(matches.next().is_none());