diff --git a/crates/cloud_llm_client/src/predict_edits_v3.rs b/crates/cloud_llm_client/src/predict_edits_v3.rs
index 2e884ae9fcb27530e5579b83767bde95b5df414c..98ca0748934d663d204c64544af8a3e83fcd704d 100644
--- a/crates/cloud_llm_client/src/predict_edits_v3.rs
+++ b/crates/cloud_llm_client/src/predict_edits_v3.rs
@@ -73,6 +73,7 @@ pub enum PromptFormat {
MarkedExcerpt,
LabeledSections,
NumLinesUniDiff,
+ OldTextNewText,
/// Prompt format intended for use via zeta_cli
OnlySnippets,
}
@@ -100,6 +101,7 @@ impl std::fmt::Display for PromptFormat {
PromptFormat::LabeledSections => write!(f, "Labeled Sections"),
PromptFormat::OnlySnippets => write!(f, "Only Snippets"),
PromptFormat::NumLinesUniDiff => write!(f, "Numbered Lines / Unified Diff"),
+ PromptFormat::OldTextNewText => write!(f, "Old Text / New Text"),
}
}
}
diff --git a/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs b/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs
index 6055c39e16ea95b38754bb26fd7371250d1fc525..3f0bd476c50b9e6f92a9f457af15899fcb33b8ed 100644
--- a/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs
+++ b/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs
@@ -100,6 +100,54 @@ const UNIFIED_DIFF_REMINDER: &str = indoc! {"
to uniquely identify it amongst all excerpts of code provided.
"};
+const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
+ # Instructions
+
+ You are an edit prediction agent in a code editor.
+ Your job is to predict the next edit that the user will make,
+ based on their last few edits and their current cursor location.
+
+ # Output Format
+
+ You must briefly explain your understanding of the user's goal, in one
+ or two sentences, and then specify their next edit, using the following
+ XML format:
+
+
+
+ OLD TEXT 1 HERE
+
+
+ NEW TEXT 1 HERE
+
+
+
+ OLD TEXT 1 HERE
+
+
+ NEW TEXT 1 HERE
+
+
+
+ - Specify the file to edit using the `path` attribute.
+ - Use `` and `` tags to replace content
+ - `` must exactly match existing file content, including indentation
+ - `` cannot be empty
+ - Do not escape quotes, newlines, or other characters within tags
+ - Always close all tags properly
+ - Don't include the <|user_cursor|> marker in your output.
+
+ # Edit History:
+
+"#};
+
+const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
+ ---
+
+ Remember that the edits in the edit history have already been deployed.
+ The files are currently as shown in the Code Excerpts section.
+"#};
+
pub fn build_prompt(
request: &predict_edits_v3::PredictEditsRequest,
) -> Result<(String, SectionLabels)> {
@@ -121,7 +169,9 @@ pub fn build_prompt(
EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
),
],
- PromptFormat::LabeledSections | PromptFormat::NumLinesUniDiff => {
+ PromptFormat::LabeledSections
+ | PromptFormat::NumLinesUniDiff
+ | PromptFormat::OldTextNewText => {
vec![(request.cursor_point, CURSOR_MARKER)]
}
PromptFormat::OnlySnippets => vec![],
@@ -131,6 +181,7 @@ pub fn build_prompt(
PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
+ PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
PromptFormat::OnlySnippets => String::new(),
};
@@ -186,6 +237,9 @@ pub fn build_prompt(
PromptFormat::NumLinesUniDiff => {
prompt.push_str(UNIFIED_DIFF_REMINDER);
}
+ PromptFormat::OldTextNewText => {
+ prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
+ }
_ => {}
}
@@ -611,6 +665,7 @@ impl<'a> SyntaxBasedPrompt<'a> {
match self.request.prompt_format {
PromptFormat::MarkedExcerpt
| PromptFormat::OnlySnippets
+ | PromptFormat::OldTextNewText
| PromptFormat::NumLinesUniDiff => {
if range.start.0 > 0 && !skipped_last_snippet {
output.push_str("…\n");
diff --git a/crates/zeta2/src/xml_edits.rs b/crates/zeta2/src/xml_edits.rs
new file mode 100644
index 0000000000000000000000000000000000000000..e8bcc4b1ba7eb2d00cd73b0b2e8d1638a5b00e32
--- /dev/null
+++ b/crates/zeta2/src/xml_edits.rs
@@ -0,0 +1,197 @@
+use anyhow::{Context as _, Result, anyhow};
+use language::{Anchor, BufferSnapshot, OffsetRangeExt as _, TextBufferSnapshot};
+use std::ops::Range;
+use std::path::Path;
+use std::sync::Arc;
+
+pub async fn parse_xml_edits<'a>(
+ mut input: &'a str,
+ get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range])> + Send,
+) -> Result<(&'a BufferSnapshot, Vec<(Range, Arc)>)> {
+ let edits_tag = parse_tag(&mut input, "edits")?.context("No edits tag")?;
+
+ input = edits_tag.body;
+
+ let file_path = edits_tag
+ .attributes
+ .trim_start()
+ .strip_prefix("path")
+ .context("no file attribute on edits tag")?
+ .trim_end()
+ .strip_prefix('=')
+ .context("no value for path attribute")?
+ .trim()
+ .trim_start_matches('"')
+ .trim_end_matches('"');
+
+ let (buffer, context_ranges) = get_buffer(file_path.as_ref())
+ .with_context(|| format!("no buffer for file {file_path}"))?;
+
+ let mut edits = vec![];
+ while let Some(old_text_tag) = parse_tag(&mut input, "old_text")? {
+ let new_text_tag =
+ parse_tag(&mut input, "new_text")?.context("no new_text tag following old_text")?;
+ edits.extend(resolve_new_text_old_text_in_buffer(
+ new_text_tag.body,
+ old_text_tag.body,
+ buffer,
+ context_ranges,
+ )?);
+ }
+
+ Ok((buffer, edits))
+}
+
+fn resolve_new_text_old_text_in_buffer(
+ new_text: &str,
+ old_text: &str,
+ buffer: &TextBufferSnapshot,
+ ranges: &[Range],
+) -> Result, Arc)>, anyhow::Error> {
+ let context_offset = if old_text.is_empty() {
+ Ok(0)
+ } else {
+ let mut offset = None;
+ for range in ranges {
+ let range = range.to_offset(buffer);
+ let text = buffer.text_for_range(range.clone()).collect::();
+ for (match_offset, _) in text.match_indices(old_text) {
+ if offset.is_some() {
+ anyhow::bail!("old_text is not unique enough:\n{}", old_text);
+ }
+ offset = Some(range.start + match_offset);
+ }
+ }
+ offset.ok_or_else(|| anyhow!("Failed to match old_text:\n{}", old_text))
+ }?;
+
+ let edits_within_hunk = language::text_diff(&old_text, &new_text);
+ Ok(edits_within_hunk
+ .into_iter()
+ .map(move |(inner_range, inner_text)| {
+ (
+ buffer.anchor_after(context_offset + inner_range.start)
+ ..buffer.anchor_before(context_offset + inner_range.end),
+ inner_text,
+ )
+ }))
+}
+
+struct ParsedTag<'a> {
+ attributes: &'a str,
+ body: &'a str,
+}
+
+fn parse_tag<'a>(input: &mut &'a str, tag: &str) -> Result