Edit prediction: add some typescript evals, fix a teacher model failure mode (#50141)

Max Brunsfeld created 1 month ago

In some evals, the teacher produced hallucinations, seemingly due to
context rot. This makes the zeta prompt crate's budgeted rendering
usable by the teacher, so that it can truncate the list of excerpts.

I've also cleaned up the implementation of zeta_prompt's
`format_related_files_within_budget`, and changed the behavior so that
it filters the the excerpts by priority but renders the files in their
original order.

Release Notes:

- N/A

Change summary

crates/edit_prediction_cli/evals/.zed/settings.json              |   1 
crates/edit_prediction_cli/evals/vscode--add-async-and-await.md  |  88 
crates/edit_prediction_cli/evals/vscode--add-class-decorator.md  |  74 
crates/edit_prediction_cli/evals/vscode--add-interface-method.md | 113 +
crates/edit_prediction_cli/src/format_prompt.rs                  |  28 
crates/edit_prediction_cli/src/git.rs                            |   2 
crates/edit_prediction_cli/src/main.rs                           |  15 
crates/edit_prediction_cli/src/synthesize.rs                     |  11 
crates/zeta_prompt/src/zeta_prompt.rs                            | 207 -
9 files changed, 372 insertions(+), 167 deletions(-)

Detailed changes

crates/edit_prediction_cli/evals/.zed/settings.json 🔗

@@ -1,3 +1,4 @@
 {
   "remove_trailing_whitespace_on_save": false,
+  "soft_wrap": "none",
 }

crates/edit_prediction_cli/evals/vscode--add-async-and-await.md 🔗

@@ -0,0 +1,88 @@
++++
+repository_url = "https://github.com/microsoft/vscode"
+revision = "29e6da6efa2287aaa981635a475d425ff4fd5d5c"
++++
+
+## Edit History
+
+```diff
+--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts
++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts
+@@ -304,8 +304,8 @@ CommandsRegistry.registerCommand({
+ 
+ CommandsRegistry.registerCommand({
+ 	id: REVERSE_CONTINUE_ID,
+-	handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
+-		getThreadAndRun(accessor, context, thread => thread.reverseContinue());
++	handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
++		await getThreadAndRun(accessor, context, thread => thread.reverseContinue());
+ 	}
+ });
+--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts
++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts
+@@ -311,11 +311,11 @@ CommandsRegistry.registerCommand({
+ 
+ CommandsRegistry.registerCommand({
+ 	id: STEP_BACK_ID,
+-	handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
++	handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
+ 		const contextKeyService = accessor.get(IContextKeyService);
+ 		if (CONTEXT_DISASSEMBLY_VIEW_FOCUS.getValue(contextKeyService)) {
+-			getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack('instruction'));
++			await getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack('instruction'));
+ 		} else {
+-			getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack());
++			await getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack());
+ 		}
+ 	}
+ });
+--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts
++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts
+@@ -323,8 +323,8 @@ CommandsRegistry.registerCommand({
+ 
+ CommandsRegistry.registerCommand({
+ 	id: TERMINATE_THREAD_ID,
+-	handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
+-		getThreadAndRun(accessor, context, thread => thread.terminate());
++	handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
++		await getThreadAndRun(accessor, context, thread => thread.terminate());
+ 	}
+ });
+```
+
+## Cursor Position
+
+```src/vs/workbench/contrib/debug/browser/debugCommands.ts
+	weight: KeybindingWeight.WorkbenchContrib,
+	primary: isWeb ? (KeyMod.Alt | KeyCode.F10) : KeyCode.F10, // Browsers do not allow F10 to be binded so we have to bind an alternative
+	when: CONTEXT_DEBUG_STATE.isEqualTo('stopped'),
+	handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
+	//       ^[CURSOR_POSITION]
+		const contextKeyService = accessor.get(IContextKeyService);
+		if (CONTEXT_DISASSEMBLY_VIEW_FOCUS.getValue(contextKeyService)) {
+			getThreadAndRun(accessor, context, (thread: IThread) => thread.next('instruction'));
+		} else {
+```
+
+## Expected Patch
+
+```diff
+--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts
++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts
+@@ -467,10 +467,10 @@ KeybindingsRegistry.registerCommandAndKeybindingRule({
+ 	weight: KeybindingWeight.WorkbenchContrib,
+ 	primary: isWeb ? (KeyMod.Alt | KeyCode.F10) : KeyCode.F10, // Browsers do not allow F10 to be binded so we have to bind an alternative
+ 	when: CONTEXT_DEBUG_STATE.isEqualTo('stopped'),
+-	handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
++	handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => {
+ 		const contextKeyService = accessor.get(IContextKeyService);
+ 		if (CONTEXT_DISASSEMBLY_VIEW_FOCUS.getValue(contextKeyService)) {
+-			getThreadAndRun(accessor, context, (thread: IThread) => thread.next('instruction'));
++			await getThreadAndRun(accessor, context, (thread: IThread) => thread.next('instruction'));
+ 		} else {
+-			getThreadAndRun(accessor, context, (thread: IThread) => thread.next());
++			await getThreadAndRun(accessor, context, (thread: IThread) => thread.next());
+ 		}
+ 	}
+ });
+```

crates/edit_prediction_cli/evals/vscode--add-class-decorator.md 🔗

@@ -0,0 +1,74 @@
++++
+repository_url = "https://github.com/microsoft/vscode"
+revision = "6f6e26fcdf0a7ca5084e0da284cd7a5b2d41ae4d"
++++
+
+## Edit History
+
+```diff
+--- a/src/vs/workbench/api/common/extHostTypes.ts
++++ b/src/vs/workbench/api/common/extHostTypes.ts
+@@ -18,6 +18,14 @@ import { FileSystemProviderErrorCode, markAsFileSystemProviderError } from 'vs/
+ import type * as vscode from 'vscode';
+
++function es5ClassCompat(target: Function): any {
++	///@ts-expect-error
++	function _() { return Reflect.construct(target, arguments, this.constructor); }
++	Object.defineProperty(_, 'name', Object.getOwnPropertyDescriptor(target, 'name')!);
++	Object.setPrototypeOf(_, target);
++	Object.setPrototypeOf(_.prototype, target.prototype);
++	return _;
++}
++
++@es5ClassCompat
+ export class Disposable {
+--- a/src/vs/workbench/api/common/extHostTypes.ts
++++ b/src/vs/workbench/api/common/extHostTypes.ts
+@@ -50,6 +58,7 @@ export class Disposable {
+ 	}
+ }
+
++@es5ClassCompat
+ export class Position {
+
+ 	static Min(...positions: Position[]): Position {
+--- a/src/vs/workbench/api/common/extHostTypes.ts
++++ b/src/vs/workbench/api/common/extHostTypes.ts
+@@ -220,6 +229,7 @@ export class Position {
+ 	}
+ }
+
++@es5ClassCompat
+ export class Range {
+
+ 	static isRange(thing: any): thing is vscode.Range {
+```
+
+## Cursor Position
+
+```src/vs/workbench/api/common/extHostTypes.ts
+	Prepend = 3
+}
+
+export class TextEdit {
+// <[CURSOR_POSITION]
+
+	static isTextEdit(thing: any): thing is TextEdit {
+		if (thing instanceof TextEdit) {
+			return true;
+```
+
+## Expected Patch
+
+```diff
+--- a/src/vs/workbench/api/common/extHostTypes.ts
++++ b/src/vs/workbench/api/common/extHostTypes.ts
+@@ -475,6 +485,7 @@ export enum EnvironmentVariableMutatorType {
+ 	Prepend = 3
+ }
+
++@es5ClassCompat
+ export class TextEdit {
+
+ 	static isTextEdit(thing: any): thing is TextEdit {
+```

crates/edit_prediction_cli/evals/vscode--add-interface-method.md 🔗

@@ -0,0 +1,113 @@
++++
+repository_url = "https://github.com/microsoft/vscode"
+revision = "b64eaf598008e2d600a81d846108f72cb37b48e2"
++++
+
+## Edit History
+
+```diff
+--- a/src/vs/platform/window/electron-main/window.ts
++++ b/src/vs/platform/window/electron-main/window.ts
+@@ -1,49 +1,50 @@
+ export interface ICodeWindow extends IDisposable {
+ 
+ 	readonly onWillLoad: Event<ILoadEvent>;
+ 	readonly onDidSignalReady: Event<void>;
++	readonly onDidTriggerSystemContextMenu: Event<{ x: number; y: number }>;
+ 	readonly onDidClose: Event<void>;
+ 	readonly onDidDestroy: Event<void>;
+ 
+ 	readonly whenClosedOrLoaded: Promise<void>;
+--- a/src/vs/platform/windows/electron-main/window.ts
++++ b/src/vs/platform/windows/electron-main/window.ts
+@@ -63,60 +63,63 @@ const enum ReadyState {
+ export class CodeWindow extends Disposable implements ICodeWindow {
+ 
+ 	//#region Events
+ 
+ 	private readonly _onWillLoad = this._register(new Emitter<ILoadEvent>());
+ 	readonly onWillLoad = this._onWillLoad.event;
+ 
+ 	private readonly _onDidSignalReady = this._register(new Emitter<void>());
+ 	readonly onDidSignalReady = this._onDidSignalReady.event;
+ 
++	private readonly _onDidTriggerSystemContextMenu = this._register(new Emitter<{ x: number; y: number }>());
++	readonly onDidTriggerSystemContextMenu = this._onDidTriggerSystemContextMenu.event;
++
+ 	private readonly _onDidClose = this._register(new Emitter<void>());
+ 	readonly onDidClose = this._onDidClose.event;
+ 
+ 	private readonly _onDidDestroy = this._register(new Emitter<void>());
+ 	readonly onDidDestroy = this._onDidDestroy.event;
+ 
+ 	//#endregion
+--- a/src/vs/platform/windows/electron-main/windows.ts
++++ b/src/vs/platform/windows/electron-main/windows.ts
+@@ -1,54 +1,55 @@
+ export interface IWindowsMainService {
+ 
+ 	readonly _serviceBrand: undefined;
+ 
+ 	readonly onDidChangeWindowsCount: Event<IWindowsCountChangedEvent>;
+ 
+ 	readonly onDidOpenWindow: Event<ICodeWindow>;
+ 	readonly onDidSignalReadyWindow: Event<ICodeWindow>;
++	readonly onDidTriggerSystemContextMenu: Event<{ window: ICodeWindow; x: number; y: number }>;
+ 	readonly onDidDestroyWindow: Event<ICodeWindow>;
+--- a/src/vs/platform/windows/electron-main/windowsMainService.ts
++++ b/src/vs/platform/windows/electron-main/windowsMainService.ts
+@@ -160,60 +160,63 @@ interface ISingleFolderWorkspacePathToOpen extends IPathToOpen {
+ export class WindowsMainService extends Disposable implements IWindowsMainService {
+ 
+ 	declare readonly _serviceBrand: undefined;
+ 
+ 	private static readonly WINDOWS: ICodeWindow[] = [];
+ 
+ 	private readonly _onDidOpenWindow = this._register(new Emitter<ICodeWindow>());
+ 	readonly onDidOpenWindow = this._onDidOpenWindow.event;
+ 
+ 	private readonly _onDidSignalReadyWindow = this._register(new Emitter<ICodeWindow>());
+ 	readonly onDidSignalReadyWindow = this._onDidSignalReadyWindow.event;
+ 
+ 	private readonly _onDidDestroyWindow = this._register(new Emitter<ICodeWindow>());
+ 	readonly onDidDestroyWindow = this._onDidDestroyWindow.event;
+ 
+ 	private readonly _onDidChangeWindowsCount = this._register(new Emitter<IWindowsCountChangedEvent>());
+ 	readonly onDidChangeWindowsCount = this._onDidChangeWindowsCount.event;
+ 
++	private readonly _onDidTriggerSystemContextMenu = this._register(new Emitter<{ window: ICodeWindow; x: number; y: number }>());
++	readonly onDidTriggerSystemContextMenu = this._onDidTriggerSystemContextMenu.event;
++
+ 	private readonly windowsStateHandler = this._register(new WindowsStateHandler(this, this.stateMainService, this.lifecycleMainService, this.logService, this.configurationService));
+```
+
+## Cursor Position
+
+```src/vs/platform/windows/test/electron-main/windowsFinder.test.ts
+	function createTestCodeWindow(options: { lastFocusTime: number; openedFolderUri?: URI; openedWorkspace?: IWorkspaceIdentifier }): ICodeWindow {
+		return new class implements ICodeWindow {
+			onWillLoad: Event<ILoadEvent> = Event.None;
+			onDidSignalReady: Event<void> = Event.None;
+			// <[CURSOR_POSITION]
+			onDidClose: Event<void> = Event.None;
+			onDidDestroy: Event<void> = Event.None;
+			whenClosedOrLoaded: Promise<void> = Promise.resolve();
+			id: number = -1;
+```
+
+## Expected Patch
+
+```diff
+--- a/src/vs/platform/windows/test/electron-main/windowsFinder.test.ts
++++ b/src/vs/platform/windows/test/electron-main/windowsFinder.test.ts
+@@ -7,60 +7,61 @@ import * as assert from 'assert';
+ 	function createTestCodeWindow(options: { lastFocusTime: number; openedFolderUri?: URI; openedWorkspace?: IWorkspaceIdentifier }): ICodeWindow {
+ 		return new class implements ICodeWindow {
+ 			onWillLoad: Event<ILoadEvent> = Event.None;
++			onDidTriggerSystemContextMenu: Event<{ x: number; y: number }> = Event.None;
+ 			onDidSignalReady: Event<void> = Event.None;
+ 			onDidClose: Event<void> = Event.None;
+ 			onDidDestroy: Event<void> = Event.None;
+ 			whenClosedOrLoaded: Promise<void> = Promise.resolve();
+ 			id: number = -1;
+```

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -9,8 +9,8 @@ use anyhow::{Context as _, Result, anyhow};
 use edit_prediction::udiff;
 use gpui::AsyncApp;
 use similar::DiffableStr;
+use std::ops::Range;
 use std::sync::Arc;
-use std::{fmt::Write as _, ops::Range};
 use zeta_prompt::{
     ZetaFormat, excerpt_range_for_format, format_zeta_prompt, resolve_cursor_region,
 };
@@ -258,7 +258,6 @@ impl TeacherPrompt {
 
     pub fn format_context(example: &Example) -> String {
         let related_files = example.prompt_inputs.as_ref().map(|pi| &pi.related_files);
-
         let Some(related_files) = related_files else {
             return "(No context)".to_string();
         };
@@ -267,27 +266,10 @@ impl TeacherPrompt {
             return "(No context)".to_string();
         }
 
-        let mut prompt = String::new();
-        for file in related_files {
-            let path_str = file.path.to_string_lossy();
-            writeln!(&mut prompt, "`````{path_str}").ok();
-
-            let mut prev_row = 0;
-            for excerpt in &file.excerpts {
-                if excerpt.row_range.start > prev_row {
-                    prompt.push_str("…\n");
-                }
-                prompt.push_str(&excerpt.text);
-                prompt.push('\n');
-                prev_row = excerpt.row_range.end;
-            }
-            if prev_row < file.max_row {
-                prompt.push_str("…\n");
-            }
-            prompt.push_str("\n`````\n");
-        }
-
-        prompt
+        let prefix = "`````";
+        let suffix = "`````\n\n";
+        let max_tokens = 1024;
+        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
     }
 
     fn format_cursor_excerpt(

crates/edit_prediction_cli/src/git.rs 🔗

@@ -91,7 +91,7 @@ pub async fn ensure_repo_cloned(repo_url: &str) -> Result<PathBuf> {
     }
 
     // Always fetch to get latest commits
-    run_git(&repo_path, &["fetch", "origin"]).await?;
+    run_git(&repo_path, &["fetch", "--depth", "1000", "origin"]).await?;
 
     // Check if we have a valid HEAD, if not checkout FETCH_HEAD
     let has_head = run_git(&repo_path, &["rev-parse", "HEAD"]).await.is_ok();

crates/edit_prediction_cli/src/main.rs 🔗

@@ -39,6 +39,7 @@ use zeta_prompt::ZetaFormat;
 
 use reqwest_client::ReqwestClient;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::env;
 use std::fmt::Display;
 use std::fs::{File, OpenOptions};
 use std::hash::{Hash, Hasher};
@@ -900,8 +901,18 @@ fn main() {
         }
 
         Command::Synthesize(synth_args) => {
-            let Some(output_dir) = args.output else {
-                panic!("output dir is required");
+            let output_dir = if let Some(output_dir) = args.output {
+                output_dir
+            } else {
+                let default_output_dir = env::current_dir()
+                    .unwrap()
+                    .join("crates/edit_prediction_cli/evals-generated");
+                if default_output_dir.parent().unwrap().exists() {
+                    std::fs::create_dir(&default_output_dir).ok();
+                    default_output_dir
+                } else {
+                    panic!("output dir is required");
+                }
             };
             let config = SynthesizeConfig {
                 repo_urls: synth_args.repos.clone(),

crates/edit_prediction_cli/src/synthesize.rs 🔗

@@ -284,7 +284,7 @@ fn should_skip_commit(commit: &CommitInfo) -> bool {
         .lines()
         .filter(|l| l.starts_with('+') || l.starts_with('-'))
         .count();
-    lines_changed < 10
+    lines_changed < 30
         || lines_changed > 1000
         || is_non_code_commit(commit)
         || is_rename_commit(commit)
@@ -377,10 +377,13 @@ fn build_prompt(repo_url: &str, commit: &CommitInfo) -> String {
         indoc! {r#"
             You are analyzing a git commit to construct a realistic edit prediction example.
 
-            Your goal is to tell the story of a programmer's editing session: what sequence of changes did they make, and what change logically comes next? We use these examples to train a model to predict edits, so the quality of the EDIT HISTORY is what matters most.
+            Your goal is to tell the story of a programmer's editing session: what sequence
+            of changes did they make, and what change logically comes next? We use these examples
+            to train a model to predict edits, so the quality of the EDIT HISTORY is what matters most.
 
             An edit prediction example consists of:
-            1. **Edit History**: 3-6 hunks showing what the programmer did BEFORE making the expected patch. This is the most important part - it must tell a coherent story of the changes leading up to the prediction.
+            1. **Edit History**: 2-6 hunks showing what the programmer did BEFORE making the expected patch.
+               This is the most important part - it must tell a coherent story of the changes leading up to the prediction.
             2. **Expected Patch**: One small hunk that logically follows from the edit history.
 
             Both single-file and multi-file patterns are acceptable.
@@ -417,7 +420,7 @@ fn build_prompt(repo_url: &str, commit: &CommitInfo) -> String {
             First, THINK through whether this commit can support a good example:
 
             1. What is the high-level pattern in this commit?
-            2. Can you identify at least 4 related hunks (3 for edit history + 1 for expected patch)?
+            2. Can you identify at least 3 related hunks (2 or more for edit history + 1 for expected patch)?
             3. What would be the narrative? (First... then... then... finally predict...)
             4. Which specific hunk should be the expected patch (the "punchline")?

crates/zeta_prompt/src/zeta_prompt.rs 🔗

@@ -358,6 +358,7 @@ fn format_zeta_prompt_with_budget(
     let related_files_section = format_related_files_within_budget(
         &input.related_files,
         "<|file_sep|>",
+        "",
         budget_after_edit_history,
     );
 
@@ -430,158 +431,89 @@ fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize
     estimate_tokens(len)
 }
 
-fn format_related_files_within_budget(
+pub fn format_related_files_within_budget(
     related_files: &[RelatedFile],
-    file_marker: &str,
+    file_prefix: &str,
+    file_suffix: &str,
     max_tokens: usize,
 ) -> String {
-    // Collect the distinct order values across all excerpts, sorted ascending.
-    let mut order_levels: Vec<usize> = related_files
+    struct ExcerptCandidate {
+        file_ix: usize,
+        excerpt_ix: usize,
+        order: usize,
+    }
+
+    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
         .iter()
-        .flat_map(|f| f.excerpts.iter().map(|e| e.order))
+        .enumerate()
+        .flat_map(|(file_ix, file)| {
+            file.excerpts
+                .iter()
+                .enumerate()
+                .map(move |(excerpt_ix, e)| ExcerptCandidate {
+                    file_ix,
+                    excerpt_ix,
+                    order: e.order,
+                })
+        })
         .collect();
-    order_levels.sort_unstable();
-    order_levels.dedup();
 
     // Pre-compute file header strings and their token costs.
     let file_headers: Vec<String> = related_files
         .iter()
         .map(|file| {
             let path_str = file.path.to_string_lossy();
-            format!("{}{}\n", file_marker, path_str)
+            format!("{}{}\n", file_prefix, path_str)
         })
         .collect();
 
-    // Track which excerpts are included per file.
-    let mut included: Vec<Vec<bool>> = related_files
-        .iter()
-        .map(|file| vec![false; file.excerpts.len()])
-        .collect();
-    let mut file_included: Vec<bool> = vec![false; related_files.len()];
+    // Sort the excerpts by their order and determine how many fit within the budget.
     let mut total_tokens = 0;
-
-    // Process order levels from best (lowest) to worst. At each level, try to
-    // include all not-yet-included excerpts with that order across all files.
-    // If the full level doesn't fit, include a partial prefix (top-to-bottom
-    // within each file) and stop — don't proceed to worse order levels.
-    'outer: for &order in &order_levels {
-        // Gather the work for this order level: for each file that has excerpts
-        // at this order, collect the not-yet-included excerpt indices (in their
-        // original positional order) and the token cost to add them (including
-        // the file header if the file isn't already included).
-        struct FileWork {
-            file_idx: usize,
-            excerpt_indices: Vec<usize>,
-            header_cost: usize,
-            excerpt_costs: Vec<usize>,
-        }
-
-        let mut work_items: Vec<FileWork> = Vec::new();
-        for (file_idx, file) in related_files.iter().enumerate() {
-            let mut excerpt_indices = Vec::new();
-            let mut excerpt_costs = Vec::new();
-            for (eidx, excerpt) in file.excerpts.iter().enumerate() {
-                if excerpt.order == order && !included[file_idx][eidx] {
-                    excerpt_indices.push(eidx);
-                    excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row));
-                }
-            }
-            if excerpt_indices.is_empty() {
-                continue;
-            }
-            let header_cost = if file_included[file_idx] {
-                0
-            } else {
-                estimate_tokens(file_headers[file_idx].len())
-            };
-            work_items.push(FileWork {
-                file_idx,
-                excerpt_indices,
-                header_cost,
-                excerpt_costs,
-            });
-        }
-
-        // Compute the total cost for this entire order level.
-        let level_cost: usize = work_items
-            .iter()
-            .map(|w| w.header_cost + w.excerpt_costs.iter().sum::<usize>())
-            .sum();
-
-        if total_tokens + level_cost <= max_tokens {
-            // The whole level fits — include everything.
-            for work in &work_items {
-                total_tokens += work.header_cost;
-                file_included[work.file_idx] = true;
-                for (i, &eidx) in work.excerpt_indices.iter().enumerate() {
-                    included[work.file_idx][eidx] = true;
-                    total_tokens += work.excerpt_costs[i];
-                }
-            }
+    let mut included_excerpt_count = 0_usize;
+    let mut included_file_indices = vec![false; related_files.len()];
+    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
+    for candidate in &excerpt_candidates {
+        let file = &related_files[candidate.file_ix];
+        let excerpt = &file.excerpts[candidate.excerpt_ix];
+        let file_already_included = included_file_indices[candidate.file_ix];
+        let header_cost = if file_already_included {
+            0
         } else {
-            // The whole level doesn't fit. Include as many excerpts as possible
-            // from each file (in positional order), then stop entirely.
-            for work in &work_items {
-                let available = max_tokens.saturating_sub(total_tokens);
-                let mut file_cost = work.header_cost;
-
-                let mut count = 0;
-                for i in 0..work.excerpt_indices.len() {
-                    if file_cost + work.excerpt_costs[i] > available {
-                        break;
-                    }
-                    file_cost += work.excerpt_costs[i];
-                    count += 1;
-                }
-
-                if count > 0 {
-                    total_tokens += work.header_cost;
-                    file_included[work.file_idx] = true;
-                    for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() {
-                        included[work.file_idx][eidx] = true;
-                        total_tokens += work.excerpt_costs[i];
-                    }
-                }
-            }
-            break 'outer;
+            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
+        };
+        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
+        if total_tokens + header_cost + excerpt_cost > max_tokens {
+            break;
         }
-    }
-
-    // Determine file rendering order: by the best (lowest) order of any
-    // included excerpt, breaking ties by original file index.
-    let mut file_order: Vec<(usize, usize)> = Vec::new();
-    for (file_idx, file) in related_files.iter().enumerate() {
-        if !file_included[file_idx] {
-            continue;
+        total_tokens += header_cost + excerpt_cost;
+        if !file_already_included {
+            included_file_indices[candidate.file_ix] = true;
         }
-        let best_order = file
-            .excerpts
-            .iter()
-            .enumerate()
-            .filter(|(eidx, _)| included[file_idx][*eidx])
-            .map(|(_, e)| e.order)
-            .min()
-            .unwrap_or(usize::MAX);
-        file_order.push((file_idx, best_order));
+        included_excerpt_count += 1;
     }
-    file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx));
 
-    // Render included files and excerpts in positional order within each file.
+    excerpt_candidates.truncate(included_excerpt_count);
+    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
+
+    // Render all of the files that fit within the token budget, in the original order.
     let mut result = String::new();
-    for &(file_idx, _) in &file_order {
-        let file = &related_files[file_idx];
-        result.push_str(&file_headers[file_idx]);
-        for (eidx, excerpt) in file.excerpts.iter().enumerate() {
-            if !included[file_idx][eidx] {
-                continue;
-            }
-            result.push_str(&excerpt.text);
-            if !result.ends_with('\n') {
-                result.push('\n');
-            }
-            if excerpt.row_range.end < file.max_row {
-                result.push_str("...\n");
+    let mut last_file_ix = None;
+    for candidate in &excerpt_candidates {
+        if last_file_ix != Some(candidate.file_ix) {
+            if last_file_ix.is_some() {
+                result.push_str(file_suffix);
             }
+            result.push_str(&file_headers[candidate.file_ix]);
+            last_file_ix = Some(candidate.file_ix);
+        }
+        let file = &related_files[candidate.file_ix];
+        let excerpt = &file.excerpts[candidate.excerpt_ix];
+        result.push_str(&excerpt.text);
+        if !result.ends_with('\n') {
+            result.push('\n');
+        }
+        if excerpt.row_range.end < file.max_row {
+            result.push_str("...\n");
         }
     }
 
@@ -958,6 +890,7 @@ pub mod seed_coder {
         let related_files_section = super::format_related_files_within_budget(
             related_files,
             FILE_MARKER,
+            "",
             budget_after_edit_history,
         );
 
@@ -1444,14 +1377,14 @@ mod tests {
             ],
         );
 
-        // With large budget, both files included; file_b (order 1) renders before file_a (order 5).
+        // With large budget, both files included; rendered in stable lexicographic order.
         assert_eq!(
             format_with_budget(&input, 10000),
             indoc! {r#"
-                <|file_sep|>file_b.rs
-                high priority content
                 <|file_sep|>file_a.rs
                 low priority content
+                <|file_sep|>file_b.rs
+                high priority content
                 <|file_sep|>test.rs
                 <|fim_prefix|>
                 <|fim_middle|>current
@@ -1757,15 +1690,15 @@ mod tests {
             ],
         );
 
-        // With large budget, both included; high_prio first due to lower order.
+        // With large budget, both included; rendered in stable lexicographic order.
         assert_eq!(
             format_seed_coder(&input),
             indoc! {r#"
                 <[fim-suffix]>
-                <[fim-prefix]><filename>high_prio.rs
-                high prio
-                <filename>low_prio.rs
+                <[fim-prefix]><filename>low_prio.rs
                 low prio
+                <filename>high_prio.rs
+                high prio
 
                 <filename>test.rs
                 <<<<<<< CURRENT