diff --git a/crates/edit_prediction_cli/evals/.zed/settings.json b/crates/edit_prediction_cli/evals/.zed/settings.json index f1e74a3aee3b9cd6bb41ec3a87a30c7ad016e379..708c4b864dca9145718fb0b9f6e5457ec705c60b 100644 --- a/crates/edit_prediction_cli/evals/.zed/settings.json +++ b/crates/edit_prediction_cli/evals/.zed/settings.json @@ -1,3 +1,4 @@ { "remove_trailing_whitespace_on_save": false, + "soft_wrap": "none", } diff --git a/crates/edit_prediction_cli/evals/vscode--add-async-and-await.md b/crates/edit_prediction_cli/evals/vscode--add-async-and-await.md new file mode 100644 index 0000000000000000000000000000000000000000..ee070bdb9d7ca98fcf65febac6cbae2e62e530ff --- /dev/null +++ b/crates/edit_prediction_cli/evals/vscode--add-async-and-await.md @@ -0,0 +1,88 @@ ++++ +repository_url = "https://github.com/microsoft/vscode" +revision = "29e6da6efa2287aaa981635a475d425ff4fd5d5c" ++++ + +## Edit History + +```diff +--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts ++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts +@@ -304,8 +304,8 @@ CommandsRegistry.registerCommand({ + + CommandsRegistry.registerCommand({ + id: REVERSE_CONTINUE_ID, +- handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { +- getThreadAndRun(accessor, context, thread => thread.reverseContinue()); ++ handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { ++ await getThreadAndRun(accessor, context, thread => thread.reverseContinue()); + } + }); +--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts ++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts +@@ -311,11 +311,11 @@ CommandsRegistry.registerCommand({ + + CommandsRegistry.registerCommand({ + id: STEP_BACK_ID, +- handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { ++ handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { + const contextKeyService = accessor.get(IContextKeyService); + if (CONTEXT_DISASSEMBLY_VIEW_FOCUS.getValue(contextKeyService)) { +- getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack('instruction')); ++ await getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack('instruction')); + } else { +- getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack()); ++ await getThreadAndRun(accessor, context, (thread: IThread) => thread.stepBack()); + } + } + }); +--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts ++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts +@@ -323,8 +323,8 @@ CommandsRegistry.registerCommand({ + + CommandsRegistry.registerCommand({ + id: TERMINATE_THREAD_ID, +- handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { +- getThreadAndRun(accessor, context, thread => thread.terminate()); ++ handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { ++ await getThreadAndRun(accessor, context, thread => thread.terminate()); + } + }); +``` + +## Cursor Position + +```src/vs/workbench/contrib/debug/browser/debugCommands.ts + weight: KeybindingWeight.WorkbenchContrib, + primary: isWeb ? (KeyMod.Alt | KeyCode.F10) : KeyCode.F10, // Browsers do not allow F10 to be binded so we have to bind an alternative + when: CONTEXT_DEBUG_STATE.isEqualTo('stopped'), + handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { + // ^[CURSOR_POSITION] + const contextKeyService = accessor.get(IContextKeyService); + if (CONTEXT_DISASSEMBLY_VIEW_FOCUS.getValue(contextKeyService)) { + getThreadAndRun(accessor, context, (thread: IThread) => thread.next('instruction')); + } else { +``` + +## Expected Patch + +```diff +--- a/src/vs/workbench/contrib/debug/browser/debugCommands.ts ++++ b/src/vs/workbench/contrib/debug/browser/debugCommands.ts +@@ -467,10 +467,10 @@ KeybindingsRegistry.registerCommandAndKeybindingRule({ + weight: KeybindingWeight.WorkbenchContrib, + primary: isWeb ? (KeyMod.Alt | KeyCode.F10) : KeyCode.F10, // Browsers do not allow F10 to be binded so we have to bind an alternative + when: CONTEXT_DEBUG_STATE.isEqualTo('stopped'), +- handler: (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { ++ handler: async (accessor: ServicesAccessor, _: string, context: CallStackContext | unknown) => { + const contextKeyService = accessor.get(IContextKeyService); + if (CONTEXT_DISASSEMBLY_VIEW_FOCUS.getValue(contextKeyService)) { +- getThreadAndRun(accessor, context, (thread: IThread) => thread.next('instruction')); ++ await getThreadAndRun(accessor, context, (thread: IThread) => thread.next('instruction')); + } else { +- getThreadAndRun(accessor, context, (thread: IThread) => thread.next()); ++ await getThreadAndRun(accessor, context, (thread: IThread) => thread.next()); + } + } + }); +``` diff --git a/crates/edit_prediction_cli/evals/vscode--add-class-decorator.md b/crates/edit_prediction_cli/evals/vscode--add-class-decorator.md new file mode 100644 index 0000000000000000000000000000000000000000..1fd1feb90e24ac52b05139b7fb2bffebb6ce84d6 --- /dev/null +++ b/crates/edit_prediction_cli/evals/vscode--add-class-decorator.md @@ -0,0 +1,74 @@ ++++ +repository_url = "https://github.com/microsoft/vscode" +revision = "6f6e26fcdf0a7ca5084e0da284cd7a5b2d41ae4d" ++++ + +## Edit History + +```diff +--- a/src/vs/workbench/api/common/extHostTypes.ts ++++ b/src/vs/workbench/api/common/extHostTypes.ts +@@ -18,6 +18,14 @@ import { FileSystemProviderErrorCode, markAsFileSystemProviderError } from 'vs/ + import type * as vscode from 'vscode'; + ++function es5ClassCompat(target: Function): any { ++ ///@ts-expect-error ++ function _() { return Reflect.construct(target, arguments, this.constructor); } ++ Object.defineProperty(_, 'name', Object.getOwnPropertyDescriptor(target, 'name')!); ++ Object.setPrototypeOf(_, target); ++ Object.setPrototypeOf(_.prototype, target.prototype); ++ return _; ++} ++ ++@es5ClassCompat + export class Disposable { +--- a/src/vs/workbench/api/common/extHostTypes.ts ++++ b/src/vs/workbench/api/common/extHostTypes.ts +@@ -50,6 +58,7 @@ export class Disposable { + } + } + ++@es5ClassCompat + export class Position { + + static Min(...positions: Position[]): Position { +--- a/src/vs/workbench/api/common/extHostTypes.ts ++++ b/src/vs/workbench/api/common/extHostTypes.ts +@@ -220,6 +229,7 @@ export class Position { + } + } + ++@es5ClassCompat + export class Range { + + static isRange(thing: any): thing is vscode.Range { +``` + +## Cursor Position + +```src/vs/workbench/api/common/extHostTypes.ts + Prepend = 3 +} + +export class TextEdit { +// <[CURSOR_POSITION] + + static isTextEdit(thing: any): thing is TextEdit { + if (thing instanceof TextEdit) { + return true; +``` + +## Expected Patch + +```diff +--- a/src/vs/workbench/api/common/extHostTypes.ts ++++ b/src/vs/workbench/api/common/extHostTypes.ts +@@ -475,6 +485,7 @@ export enum EnvironmentVariableMutatorType { + Prepend = 3 + } + ++@es5ClassCompat + export class TextEdit { + + static isTextEdit(thing: any): thing is TextEdit { +``` diff --git a/crates/edit_prediction_cli/evals/vscode--add-interface-method.md b/crates/edit_prediction_cli/evals/vscode--add-interface-method.md new file mode 100644 index 0000000000000000000000000000000000000000..898ebd3bd82bb189baf75527628bb99b7f6345c4 --- /dev/null +++ b/crates/edit_prediction_cli/evals/vscode--add-interface-method.md @@ -0,0 +1,113 @@ ++++ +repository_url = "https://github.com/microsoft/vscode" +revision = "b64eaf598008e2d600a81d846108f72cb37b48e2" ++++ + +## Edit History + +```diff +--- a/src/vs/platform/window/electron-main/window.ts ++++ b/src/vs/platform/window/electron-main/window.ts +@@ -1,49 +1,50 @@ + export interface ICodeWindow extends IDisposable { + + readonly onWillLoad: Event; + readonly onDidSignalReady: Event; ++ readonly onDidTriggerSystemContextMenu: Event<{ x: number; y: number }>; + readonly onDidClose: Event; + readonly onDidDestroy: Event; + + readonly whenClosedOrLoaded: Promise; +--- a/src/vs/platform/windows/electron-main/window.ts ++++ b/src/vs/platform/windows/electron-main/window.ts +@@ -63,60 +63,63 @@ const enum ReadyState { + export class CodeWindow extends Disposable implements ICodeWindow { + + //#region Events + + private readonly _onWillLoad = this._register(new Emitter()); + readonly onWillLoad = this._onWillLoad.event; + + private readonly _onDidSignalReady = this._register(new Emitter()); + readonly onDidSignalReady = this._onDidSignalReady.event; + ++ private readonly _onDidTriggerSystemContextMenu = this._register(new Emitter<{ x: number; y: number }>()); ++ readonly onDidTriggerSystemContextMenu = this._onDidTriggerSystemContextMenu.event; ++ + private readonly _onDidClose = this._register(new Emitter()); + readonly onDidClose = this._onDidClose.event; + + private readonly _onDidDestroy = this._register(new Emitter()); + readonly onDidDestroy = this._onDidDestroy.event; + + //#endregion +--- a/src/vs/platform/windows/electron-main/windows.ts ++++ b/src/vs/platform/windows/electron-main/windows.ts +@@ -1,54 +1,55 @@ + export interface IWindowsMainService { + + readonly _serviceBrand: undefined; + + readonly onDidChangeWindowsCount: Event; + + readonly onDidOpenWindow: Event; + readonly onDidSignalReadyWindow: Event; ++ readonly onDidTriggerSystemContextMenu: Event<{ window: ICodeWindow; x: number; y: number }>; + readonly onDidDestroyWindow: Event; +--- a/src/vs/platform/windows/electron-main/windowsMainService.ts ++++ b/src/vs/platform/windows/electron-main/windowsMainService.ts +@@ -160,60 +160,63 @@ interface ISingleFolderWorkspacePathToOpen extends IPathToOpen { + export class WindowsMainService extends Disposable implements IWindowsMainService { + + declare readonly _serviceBrand: undefined; + + private static readonly WINDOWS: ICodeWindow[] = []; + + private readonly _onDidOpenWindow = this._register(new Emitter()); + readonly onDidOpenWindow = this._onDidOpenWindow.event; + + private readonly _onDidSignalReadyWindow = this._register(new Emitter()); + readonly onDidSignalReadyWindow = this._onDidSignalReadyWindow.event; + + private readonly _onDidDestroyWindow = this._register(new Emitter()); + readonly onDidDestroyWindow = this._onDidDestroyWindow.event; + + private readonly _onDidChangeWindowsCount = this._register(new Emitter()); + readonly onDidChangeWindowsCount = this._onDidChangeWindowsCount.event; + ++ private readonly _onDidTriggerSystemContextMenu = this._register(new Emitter<{ window: ICodeWindow; x: number; y: number }>()); ++ readonly onDidTriggerSystemContextMenu = this._onDidTriggerSystemContextMenu.event; ++ + private readonly windowsStateHandler = this._register(new WindowsStateHandler(this, this.stateMainService, this.lifecycleMainService, this.logService, this.configurationService)); +``` + +## Cursor Position + +```src/vs/platform/windows/test/electron-main/windowsFinder.test.ts + function createTestCodeWindow(options: { lastFocusTime: number; openedFolderUri?: URI; openedWorkspace?: IWorkspaceIdentifier }): ICodeWindow { + return new class implements ICodeWindow { + onWillLoad: Event = Event.None; + onDidSignalReady: Event = Event.None; + // <[CURSOR_POSITION] + onDidClose: Event = Event.None; + onDidDestroy: Event = Event.None; + whenClosedOrLoaded: Promise = Promise.resolve(); + id: number = -1; +``` + +## Expected Patch + +```diff +--- a/src/vs/platform/windows/test/electron-main/windowsFinder.test.ts ++++ b/src/vs/platform/windows/test/electron-main/windowsFinder.test.ts +@@ -7,60 +7,61 @@ import * as assert from 'assert'; + function createTestCodeWindow(options: { lastFocusTime: number; openedFolderUri?: URI; openedWorkspace?: IWorkspaceIdentifier }): ICodeWindow { + return new class implements ICodeWindow { + onWillLoad: Event = Event.None; ++ onDidTriggerSystemContextMenu: Event<{ x: number; y: number }> = Event.None; + onDidSignalReady: Event = Event.None; + onDidClose: Event = Event.None; + onDidDestroy: Event = Event.None; + whenClosedOrLoaded: Promise = Promise.resolve(); + id: number = -1; +``` diff --git a/crates/edit_prediction_cli/src/format_prompt.rs b/crates/edit_prediction_cli/src/format_prompt.rs index d8fd613ee8d6e1323c8ca0521ca67c837e9fb225..6cdfeef8f569df9277d3417c0134b2c7047bee30 100644 --- a/crates/edit_prediction_cli/src/format_prompt.rs +++ b/crates/edit_prediction_cli/src/format_prompt.rs @@ -9,8 +9,8 @@ use anyhow::{Context as _, Result, anyhow}; use edit_prediction::udiff; use gpui::AsyncApp; use similar::DiffableStr; +use std::ops::Range; use std::sync::Arc; -use std::{fmt::Write as _, ops::Range}; use zeta_prompt::{ ZetaFormat, excerpt_range_for_format, format_zeta_prompt, resolve_cursor_region, }; @@ -258,7 +258,6 @@ impl TeacherPrompt { pub fn format_context(example: &Example) -> String { let related_files = example.prompt_inputs.as_ref().map(|pi| &pi.related_files); - let Some(related_files) = related_files else { return "(No context)".to_string(); }; @@ -267,27 +266,10 @@ impl TeacherPrompt { return "(No context)".to_string(); } - let mut prompt = String::new(); - for file in related_files { - let path_str = file.path.to_string_lossy(); - writeln!(&mut prompt, "`````{path_str}").ok(); - - let mut prev_row = 0; - for excerpt in &file.excerpts { - if excerpt.row_range.start > prev_row { - prompt.push_str("…\n"); - } - prompt.push_str(&excerpt.text); - prompt.push('\n'); - prev_row = excerpt.row_range.end; - } - if prev_row < file.max_row { - prompt.push_str("…\n"); - } - prompt.push_str("\n`````\n"); - } - - prompt + let prefix = "`````"; + let suffix = "`````\n\n"; + let max_tokens = 1024; + zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens) } fn format_cursor_excerpt( diff --git a/crates/edit_prediction_cli/src/git.rs b/crates/edit_prediction_cli/src/git.rs index dea6637d4330b671e4b59c436a933450a762328c..59ce3aba40eb162313035cbfe1c9356488ba23ed 100644 --- a/crates/edit_prediction_cli/src/git.rs +++ b/crates/edit_prediction_cli/src/git.rs @@ -91,7 +91,7 @@ pub async fn ensure_repo_cloned(repo_url: &str) -> Result { } // Always fetch to get latest commits - run_git(&repo_path, &["fetch", "origin"]).await?; + run_git(&repo_path, &["fetch", "--depth", "1000", "origin"]).await?; // Check if we have a valid HEAD, if not checkout FETCH_HEAD let has_head = run_git(&repo_path, &["rev-parse", "HEAD"]).await.is_ok(); diff --git a/crates/edit_prediction_cli/src/main.rs b/crates/edit_prediction_cli/src/main.rs index a6a0b2e3145cefbe7dd84a88733fe5d865b6364b..1ab126d32ee19b2eb754f4ad31fbaf38ed5eaafc 100644 --- a/crates/edit_prediction_cli/src/main.rs +++ b/crates/edit_prediction_cli/src/main.rs @@ -39,6 +39,7 @@ use zeta_prompt::ZetaFormat; use reqwest_client::ReqwestClient; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::env; use std::fmt::Display; use std::fs::{File, OpenOptions}; use std::hash::{Hash, Hasher}; @@ -900,8 +901,18 @@ fn main() { } Command::Synthesize(synth_args) => { - let Some(output_dir) = args.output else { - panic!("output dir is required"); + let output_dir = if let Some(output_dir) = args.output { + output_dir + } else { + let default_output_dir = env::current_dir() + .unwrap() + .join("crates/edit_prediction_cli/evals-generated"); + if default_output_dir.parent().unwrap().exists() { + std::fs::create_dir(&default_output_dir).ok(); + default_output_dir + } else { + panic!("output dir is required"); + } }; let config = SynthesizeConfig { repo_urls: synth_args.repos.clone(), diff --git a/crates/edit_prediction_cli/src/synthesize.rs b/crates/edit_prediction_cli/src/synthesize.rs index 3977804a8fc686e547d5b518bc64bd836a1afc7f..228690ae49eb1bdcedd8b8f7e0804c65a62213f5 100644 --- a/crates/edit_prediction_cli/src/synthesize.rs +++ b/crates/edit_prediction_cli/src/synthesize.rs @@ -284,7 +284,7 @@ fn should_skip_commit(commit: &CommitInfo) -> bool { .lines() .filter(|l| l.starts_with('+') || l.starts_with('-')) .count(); - lines_changed < 10 + lines_changed < 30 || lines_changed > 1000 || is_non_code_commit(commit) || is_rename_commit(commit) @@ -377,10 +377,13 @@ fn build_prompt(repo_url: &str, commit: &CommitInfo) -> String { indoc! {r#" You are analyzing a git commit to construct a realistic edit prediction example. - Your goal is to tell the story of a programmer's editing session: what sequence of changes did they make, and what change logically comes next? We use these examples to train a model to predict edits, so the quality of the EDIT HISTORY is what matters most. + Your goal is to tell the story of a programmer's editing session: what sequence + of changes did they make, and what change logically comes next? We use these examples + to train a model to predict edits, so the quality of the EDIT HISTORY is what matters most. An edit prediction example consists of: - 1. **Edit History**: 3-6 hunks showing what the programmer did BEFORE making the expected patch. This is the most important part - it must tell a coherent story of the changes leading up to the prediction. + 1. **Edit History**: 2-6 hunks showing what the programmer did BEFORE making the expected patch. + This is the most important part - it must tell a coherent story of the changes leading up to the prediction. 2. **Expected Patch**: One small hunk that logically follows from the edit history. Both single-file and multi-file patterns are acceptable. @@ -417,7 +420,7 @@ fn build_prompt(repo_url: &str, commit: &CommitInfo) -> String { First, THINK through whether this commit can support a good example: 1. What is the high-level pattern in this commit? - 2. Can you identify at least 4 related hunks (3 for edit history + 1 for expected patch)? + 2. Can you identify at least 3 related hunks (2 or more for edit history + 1 for expected patch)? 3. What would be the narrative? (First... then... then... finally predict...) 4. Which specific hunk should be the expected patch (the "punchline")? diff --git a/crates/zeta_prompt/src/zeta_prompt.rs b/crates/zeta_prompt/src/zeta_prompt.rs index 95110bae009d1fc40766f741e4aad06b4c10ca6c..bdd5afffa975adc11176928a89e4cb52b4cd69c3 100644 --- a/crates/zeta_prompt/src/zeta_prompt.rs +++ b/crates/zeta_prompt/src/zeta_prompt.rs @@ -358,6 +358,7 @@ fn format_zeta_prompt_with_budget( let related_files_section = format_related_files_within_budget( &input.related_files, "<|file_sep|>", + "", budget_after_edit_history, ); @@ -430,158 +431,89 @@ fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize estimate_tokens(len) } -fn format_related_files_within_budget( +pub fn format_related_files_within_budget( related_files: &[RelatedFile], - file_marker: &str, + file_prefix: &str, + file_suffix: &str, max_tokens: usize, ) -> String { - // Collect the distinct order values across all excerpts, sorted ascending. - let mut order_levels: Vec = related_files + struct ExcerptCandidate { + file_ix: usize, + excerpt_ix: usize, + order: usize, + } + + let mut excerpt_candidates: Vec = related_files .iter() - .flat_map(|f| f.excerpts.iter().map(|e| e.order)) + .enumerate() + .flat_map(|(file_ix, file)| { + file.excerpts + .iter() + .enumerate() + .map(move |(excerpt_ix, e)| ExcerptCandidate { + file_ix, + excerpt_ix, + order: e.order, + }) + }) .collect(); - order_levels.sort_unstable(); - order_levels.dedup(); // Pre-compute file header strings and their token costs. let file_headers: Vec = related_files .iter() .map(|file| { let path_str = file.path.to_string_lossy(); - format!("{}{}\n", file_marker, path_str) + format!("{}{}\n", file_prefix, path_str) }) .collect(); - // Track which excerpts are included per file. - let mut included: Vec> = related_files - .iter() - .map(|file| vec![false; file.excerpts.len()]) - .collect(); - let mut file_included: Vec = vec![false; related_files.len()]; + // Sort the excerpts by their order and determine how many fit within the budget. let mut total_tokens = 0; - - // Process order levels from best (lowest) to worst. At each level, try to - // include all not-yet-included excerpts with that order across all files. - // If the full level doesn't fit, include a partial prefix (top-to-bottom - // within each file) and stop — don't proceed to worse order levels. - 'outer: for &order in &order_levels { - // Gather the work for this order level: for each file that has excerpts - // at this order, collect the not-yet-included excerpt indices (in their - // original positional order) and the token cost to add them (including - // the file header if the file isn't already included). - struct FileWork { - file_idx: usize, - excerpt_indices: Vec, - header_cost: usize, - excerpt_costs: Vec, - } - - let mut work_items: Vec = Vec::new(); - for (file_idx, file) in related_files.iter().enumerate() { - let mut excerpt_indices = Vec::new(); - let mut excerpt_costs = Vec::new(); - for (eidx, excerpt) in file.excerpts.iter().enumerate() { - if excerpt.order == order && !included[file_idx][eidx] { - excerpt_indices.push(eidx); - excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row)); - } - } - if excerpt_indices.is_empty() { - continue; - } - let header_cost = if file_included[file_idx] { - 0 - } else { - estimate_tokens(file_headers[file_idx].len()) - }; - work_items.push(FileWork { - file_idx, - excerpt_indices, - header_cost, - excerpt_costs, - }); - } - - // Compute the total cost for this entire order level. - let level_cost: usize = work_items - .iter() - .map(|w| w.header_cost + w.excerpt_costs.iter().sum::()) - .sum(); - - if total_tokens + level_cost <= max_tokens { - // The whole level fits — include everything. - for work in &work_items { - total_tokens += work.header_cost; - file_included[work.file_idx] = true; - for (i, &eidx) in work.excerpt_indices.iter().enumerate() { - included[work.file_idx][eidx] = true; - total_tokens += work.excerpt_costs[i]; - } - } + let mut included_excerpt_count = 0_usize; + let mut included_file_indices = vec![false; related_files.len()]; + excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix)); + for candidate in &excerpt_candidates { + let file = &related_files[candidate.file_ix]; + let excerpt = &file.excerpts[candidate.excerpt_ix]; + let file_already_included = included_file_indices[candidate.file_ix]; + let header_cost = if file_already_included { + 0 } else { - // The whole level doesn't fit. Include as many excerpts as possible - // from each file (in positional order), then stop entirely. - for work in &work_items { - let available = max_tokens.saturating_sub(total_tokens); - let mut file_cost = work.header_cost; - - let mut count = 0; - for i in 0..work.excerpt_indices.len() { - if file_cost + work.excerpt_costs[i] > available { - break; - } - file_cost += work.excerpt_costs[i]; - count += 1; - } - - if count > 0 { - total_tokens += work.header_cost; - file_included[work.file_idx] = true; - for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() { - included[work.file_idx][eidx] = true; - total_tokens += work.excerpt_costs[i]; - } - } - } - break 'outer; + estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len()) + }; + let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row); + if total_tokens + header_cost + excerpt_cost > max_tokens { + break; } - } - - // Determine file rendering order: by the best (lowest) order of any - // included excerpt, breaking ties by original file index. - let mut file_order: Vec<(usize, usize)> = Vec::new(); - for (file_idx, file) in related_files.iter().enumerate() { - if !file_included[file_idx] { - continue; + total_tokens += header_cost + excerpt_cost; + if !file_already_included { + included_file_indices[candidate.file_ix] = true; } - let best_order = file - .excerpts - .iter() - .enumerate() - .filter(|(eidx, _)| included[file_idx][*eidx]) - .map(|(_, e)| e.order) - .min() - .unwrap_or(usize::MAX); - file_order.push((file_idx, best_order)); + included_excerpt_count += 1; } - file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx)); - // Render included files and excerpts in positional order within each file. + excerpt_candidates.truncate(included_excerpt_count); + excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix)); + + // Render all of the files that fit within the token budget, in the original order. let mut result = String::new(); - for &(file_idx, _) in &file_order { - let file = &related_files[file_idx]; - result.push_str(&file_headers[file_idx]); - for (eidx, excerpt) in file.excerpts.iter().enumerate() { - if !included[file_idx][eidx] { - continue; - } - result.push_str(&excerpt.text); - if !result.ends_with('\n') { - result.push('\n'); - } - if excerpt.row_range.end < file.max_row { - result.push_str("...\n"); + let mut last_file_ix = None; + for candidate in &excerpt_candidates { + if last_file_ix != Some(candidate.file_ix) { + if last_file_ix.is_some() { + result.push_str(file_suffix); } + result.push_str(&file_headers[candidate.file_ix]); + last_file_ix = Some(candidate.file_ix); + } + let file = &related_files[candidate.file_ix]; + let excerpt = &file.excerpts[candidate.excerpt_ix]; + result.push_str(&excerpt.text); + if !result.ends_with('\n') { + result.push('\n'); + } + if excerpt.row_range.end < file.max_row { + result.push_str("...\n"); } } @@ -958,6 +890,7 @@ pub mod seed_coder { let related_files_section = super::format_related_files_within_budget( related_files, FILE_MARKER, + "", budget_after_edit_history, ); @@ -1444,14 +1377,14 @@ mod tests { ], ); - // With large budget, both files included; file_b (order 1) renders before file_a (order 5). + // With large budget, both files included; rendered in stable lexicographic order. assert_eq!( format_with_budget(&input, 10000), indoc! {r#" - <|file_sep|>file_b.rs - high priority content <|file_sep|>file_a.rs low priority content + <|file_sep|>file_b.rs + high priority content <|file_sep|>test.rs <|fim_prefix|> <|fim_middle|>current @@ -1757,15 +1690,15 @@ mod tests { ], ); - // With large budget, both included; high_prio first due to lower order. + // With large budget, both included; rendered in stable lexicographic order. assert_eq!( format_seed_coder(&input), indoc! {r#" <[fim-suffix]> - <[fim-prefix]>high_prio.rs - high prio - low_prio.rs + <[fim-prefix]>low_prio.rs low prio + high_prio.rs + high prio test.rs <<<<<<< CURRENT