1use crate::{
2 anthropic_client::PlainLlmClient,
3 git::{ensure_repo_cloned, run_git},
4 paths::{FAILED_EXAMPLES_DIR, LATEST_FAILED_EXAMPLES_DIR, SYNTHESIZE_STATE_FILE},
5 progress::{InfoStyle, Progress, Step, StepProgress},
6};
7use anthropic::ResponseContent;
8use anyhow::{Context as _, Result};
9use chrono::Local;
10use collections::{HashMap, HashSet};
11use edit_prediction::{
12 example_spec::ExampleSpec,
13 udiff::{apply_diff_to_string, edits_for_diff},
14};
15use indoc::indoc;
16use serde::{Deserialize, Serialize};
17use std::{
18 path::{Path, PathBuf},
19 sync::Arc,
20};
21
22#[derive(Debug, Clone)]
23pub struct SynthesizeConfig {
24 pub repo_url: String,
25 pub count: usize,
26 pub max_commits: usize,
27 pub output_dir: PathBuf,
28 pub fresh: bool,
29}
30
31#[derive(Debug, Default, Serialize, Deserialize)]
32struct SynthesizeState {
33 repositories: HashMap<String, RepoState>,
34}
35
36#[derive(Debug, Default, Serialize, Deserialize)]
37struct RepoState {
38 processed_commits: HashSet<String>,
39 examples_generated: usize,
40}
41
42impl SynthesizeState {
43 fn load() -> Self {
44 if SYNTHESIZE_STATE_FILE.exists() {
45 std::fs::read_to_string(&*SYNTHESIZE_STATE_FILE)
46 .ok()
47 .and_then(|s| serde_json::from_str(&s).ok())
48 .unwrap_or_default()
49 } else {
50 Self::default()
51 }
52 }
53
54 fn save(&self) -> Result<()> {
55 let content = serde_json::to_string_pretty(self)?;
56 std::fs::write(&*SYNTHESIZE_STATE_FILE, content)?;
57 Ok(())
58 }
59
60 fn is_processed(&self, repo_url: &str, commit_sha: &str) -> bool {
61 self.repositories
62 .get(repo_url)
63 .is_some_and(|repo| repo.processed_commits.contains(commit_sha))
64 }
65
66 fn mark_processed(&mut self, repo_url: &str, commit_sha: &str, examples_count: usize) {
67 let repo = self.repositories.entry(repo_url.to_string()).or_default();
68 repo.processed_commits.insert(commit_sha.to_string());
69 repo.examples_generated += examples_count;
70 }
71}
72
73#[derive(Debug)]
74struct CommitInfo {
75 sha: String,
76 parent_sha: String,
77 message: String,
78 diff: String,
79 expanded_diff: String,
80}
81
82/// Claude's response parsed into structured form
83#[derive(Debug)]
84struct ClaudeResponse {
85 name: String,
86 reasoning: String,
87 edit_history_hunks: Vec<String>,
88 expected_patch_hunks: Vec<String>,
89}
90
91pub async fn run_synthesize(config: SynthesizeConfig) -> Result<()> {
92 let mut state = if config.fresh {
93 SynthesizeState::default()
94 } else {
95 SynthesizeState::load()
96 };
97
98 std::fs::create_dir_all(&config.output_dir)?;
99 std::fs::create_dir_all(&*FAILED_EXAMPLES_DIR)?;
100
101 // Create "latest_failed" symlink pointing to this run's failed directory
102 if LATEST_FAILED_EXAMPLES_DIR.is_symlink() {
103 std::fs::remove_file(&*LATEST_FAILED_EXAMPLES_DIR)?;
104 }
105 #[cfg(unix)]
106 std::os::unix::fs::symlink(&*FAILED_EXAMPLES_DIR, &*LATEST_FAILED_EXAMPLES_DIR)?;
107 #[cfg(windows)]
108 std::os::windows::fs::symlink_dir(&*FAILED_EXAMPLES_DIR, &*LATEST_FAILED_EXAMPLES_DIR)?;
109
110 let progress = Progress::global();
111 progress.set_total_examples(config.count);
112
113 let clone_progress = progress.start(Step::Synthesize, "clone");
114 let repo_path = ensure_repo_cloned(&config.repo_url).await?;
115 drop(clone_progress);
116
117 let client = PlainLlmClient::new()?;
118 let mut examples_generated = 0;
119 let mut commits_skipped = 0;
120 let batch_size = config.max_commits;
121
122 'outer: loop {
123 let list_progress = progress.start(Step::Synthesize, "list-commits");
124 let commits = list_commits(&repo_path, batch_size, commits_skipped).await?;
125 drop(list_progress);
126
127 if commits.is_empty() {
128 break;
129 }
130
131 commits_skipped += commits.len();
132
133 for commit in commits {
134 if examples_generated >= config.count {
135 break 'outer;
136 }
137
138 if !config.fresh && state.is_processed(&config.repo_url, &commit.sha) {
139 continue;
140 }
141
142 if should_skip_commit(&commit) {
143 continue;
144 }
145
146 let commit_label = format!(
147 "{} {}",
148 &commit.sha[..8],
149 truncate_message(&commit.message, 40)
150 );
151 let step_progress = Arc::new(progress.start(Step::Synthesize, &commit_label));
152
153 // Single Claude call to identify and copy hunks
154 step_progress.set_substatus("analyzing...");
155 let claude_response =
156 match analyze_commit(&client, &config, &commit, step_progress.clone()).await {
157 Ok(Some(response)) => response,
158 Ok(None) => {
159 step_progress.set_info("no pattern", InfoStyle::Normal);
160 state.mark_processed(&config.repo_url, &commit.sha, 0);
161 state.save()?;
162 continue;
163 }
164 Err(e) => {
165 step_progress.set_info(format!("error: {:?}", e), InfoStyle::Warning);
166 state.mark_processed(&config.repo_url, &commit.sha, 0);
167 state.save()?;
168 continue;
169 }
170 };
171
172 // Validate and build the example
173 step_progress.set_substatus("validating...");
174 match build_example(&config, &commit, &repo_path, &claude_response).await {
175 Ok(spec) => {
176 let timestamp = Local::now().format("%Y-%m-%d--%H-%M-%S");
177 let filename = format!("{}.md", timestamp);
178 let path = config.output_dir.join(&filename);
179 std::fs::write(&path, spec.to_markdown())?;
180 examples_generated += 1;
181 step_progress.set_info(filename, InfoStyle::Normal);
182 }
183 Err(rejection_reason) => {
184 log::debug!("Example rejected: {}", rejection_reason);
185 let timestamp = Local::now().format("%Y-%m-%d--%H-%M-%S%.3f");
186 let filename = format!("{}.md", timestamp);
187 let path = FAILED_EXAMPLES_DIR.join(&filename);
188 let content = format_rejected_example(&claude_response, &rejection_reason);
189 if let Err(e) = std::fs::write(&path, content) {
190 log::warn!("Failed to write rejected example: {:?}", e);
191 }
192 step_progress.set_info(format!("rejected: {}", filename), InfoStyle::Warning);
193 }
194 }
195
196 state.mark_processed(&config.repo_url, &commit.sha, 1);
197 state.save()?;
198 }
199 }
200
201 progress.finalize();
202 Ok(())
203}
204
205fn truncate_message(msg: &str, max_len: usize) -> String {
206 let first_line = msg.lines().next().unwrap_or("");
207 if first_line.len() <= max_len {
208 first_line.to_string()
209 } else {
210 format!("{}...", &first_line[..max_len - 3])
211 }
212}
213
214fn should_skip_commit(commit: &CommitInfo) -> bool {
215 let lines_changed = commit
216 .diff
217 .lines()
218 .filter(|l| l.starts_with('+') || l.starts_with('-'))
219 .count();
220 lines_changed < 10
221 || lines_changed > 1000
222 || is_non_code_commit(commit)
223 || is_rename_commit(commit)
224}
225
226fn is_non_code_commit(commit: &CommitInfo) -> bool {
227 let non_code_extensions = [
228 ".md", ".txt", ".json", ".yaml", ".yml", ".toml", ".lock", ".svg", ".png", ".jpg", ".gif",
229 ".ico", ".woff", ".ttf", ".eot",
230 ];
231
232 let diff_files: Vec<&str> = commit
233 .diff
234 .lines()
235 .filter(|l| l.starts_with("+++ b/") || l.starts_with("--- a/"))
236 .filter_map(|l| {
237 l.strip_prefix("+++ b/")
238 .or_else(|| l.strip_prefix("--- a/"))
239 })
240 .collect();
241
242 if diff_files.is_empty() {
243 return false;
244 }
245
246 diff_files
247 .iter()
248 .all(|f| non_code_extensions.iter().any(|ext| f.ends_with(ext)))
249}
250
251fn is_rename_commit(commit: &CommitInfo) -> bool {
252 commit.diff.contains("similarity index")
253 || commit.diff.contains("rename from")
254 || commit.diff.contains("rename to")
255}
256
257async fn list_commits(
258 repo_path: &Path,
259 max_commits: usize,
260 skip: usize,
261) -> Result<Vec<CommitInfo>> {
262 let output = run_git(
263 repo_path,
264 &[
265 "log",
266 "--no-merges",
267 &format!("--skip={}", skip),
268 &format!("-{}", max_commits),
269 "--format=%H|%P|%s",
270 ],
271 )
272 .await?;
273
274 let mut commits = Vec::new();
275 for line in output.lines() {
276 let parts: Vec<&str> = line.splitn(3, '|').collect();
277 if parts.len() < 3 {
278 continue;
279 }
280 let sha = parts[0].to_string();
281 let parent_sha = parts[1].split_whitespace().next().unwrap_or("").to_string();
282 if parent_sha.is_empty() {
283 continue;
284 }
285
286 // Get standard diff (for skip checks)
287 let diff = run_git(repo_path, &["show", "--format=", &sha])
288 .await
289 .unwrap_or_default();
290
291 // Get expanded diff with 30 lines of context
292 let expanded_diff = run_git(repo_path, &["show", "-U30", "--format=", &sha])
293 .await
294 .unwrap_or_default();
295
296 commits.push(CommitInfo {
297 sha,
298 parent_sha,
299 message: parts[2].to_string(),
300 diff,
301 expanded_diff,
302 });
303 }
304
305 Ok(commits)
306}
307
308fn build_prompt(config: &SynthesizeConfig, commit: &CommitInfo) -> String {
309 format!(
310 indoc! {r#"
311 You are analyzing a git commit to construct a realistic edit prediction example.
312
313 Your goal is to tell the story of a programmer's editing session: what sequence of changes did they make, and what change logically comes next? We use these examples to train a model to predict edits, so the quality of the EDIT HISTORY is what matters most.
314
315 An edit prediction example consists of:
316 1. **Edit History**: 3-6 hunks showing what the programmer did BEFORE making the expected patch. This is the most important part - it must tell a coherent story of the changes leading up to the prediction.
317 2. **Expected Patch**: One small hunk that logically follows from the edit history.
318
319 Both single-file and multi-file patterns are acceptable.
320
321 ## What Makes a Good Example
322
323 The edit history should read like a story: "First the programmer changed X, then Y, then Z, and now they need to change W."
324
325 GOOD examples (rich sequences with 3+ steps):
326 - Removing a parameter: docstring update → constructor change → field removal → (predict) usage site update
327 - Adding a feature: type definition → first usage → second usage → (predict) third usage
328 - Bug fix pattern: fix in file A → fix in file B → fix in file C → (predict) fix in file D
329
330 BAD examples (respond NO_PATTERN):
331 - Commits where all changes are independent (no narrative thread)
332 - Simple find-and-replace (renaming, version bumps)
333 - Documentation-only or config-only changes
334 - Changes where you can only find 1-2 hunks for the edit history
335
336 ## Commit Information
337
338 Repository: {repo_url}
339 Commit: {sha}
340 Message: {message}
341
342 ## Diff (30 lines context)
343
344 ```diff
345 {expanded_diff}
346 ```
347
348 ## Your Task
349
350 First, THINK through whether this commit can support a good example:
351
352 1. What is the high-level pattern in this commit?
353 2. Can you identify at least 4 related hunks (3 for edit history + 1 for expected patch)?
354 3. What would be the narrative? (First... then... then... finally predict...)
355 4. Which specific hunk should be the expected patch (the "punchline")?
356
357 If you cannot construct a coherent 3+ hunk story, respond with just:
358 NO_PATTERN: <brief reason>
359
360 If you CAN construct a good example, respond in this format:
361
362 ANALYSIS:
363 Pattern: <one sentence describing the pattern>
364 Steps:
365 1. <file:line-range> - <what this hunk does>
366 2. <file:line-range> - <what this hunk does>
367 3. <file:line-range> - <what this hunk does>
368 4. [EXPECTED PATCH] <file:line-range> - <what this hunk does>
369
370 NAME: <short description, like a commit message, under 60 chars>
371
372 EDIT_HISTORY:
373
374 Hunk 1:
375 ```diff
376 --- a/src/models/user.py
377 +++ b/src/models/user.py
378 @@ -15,7 +15,6 @@ class User:
379 """A user in the system.
380
381 Attributes:
382 - email: The user's email address.
383 name: The user's display name.
384 """
385 ```
386
387 Hunk 2:
388 ```diff
389 --- a/src/models/user.py
390 +++ b/src/models/user.py
391 @@ -25,10 +24,9 @@ class User:
392 def __init__(
393 self,
394 name: str,
395 - email: str,
396 created_at: datetime,
397 ):
398 self.name = name
399 - self.email = email
400 self.created_at = created_at
401 ```
402
403 Hunk 3:
404 ```diff
405 --- a/src/api/handlers.py
406 +++ b/src/api/handlers.py
407 @@ -42,7 +42,6 @@ def create_user(request):
408 data = request.json()
409 user = User(
410 name=data["name"],
411 - email=data["email"],
412 created_at=datetime.now(),
413 )
414 return user.save()
415 ```
416
417 EXPECTED_PATCH:
418 ```diff
419 --- a/src/api/handlers.py
420 +++ b/src/api/handlers.py
421 @@ -58,7 +57,6 @@ def update_user(request, user_id):
422 user = User.get(user_id)
423 user.name = data.get("name", user.name)
424 - user.email = data.get("email", user.email)
425 user.save()
426 return user
427 ```
428
429 ## Requirements for the diffs
430
431 Edit history:
432 - MUST have 3-6 hunks (if you cannot find 3+, respond NO_PATTERN instead)
433 - Each hunk needs file headers (--- a/path and +++ b/path)
434 - Hunks must be valid unified diffs that apply to the parent commit
435 - Order hunks as a programmer would naturally make the changes
436
437 Expected patch:
438 - Must be a SINGLE hunk from a SINGLE file
439 - Must be SMALL: 1-15 changed lines (not counting context)
440 - Must be clearly predictable from the edit history narrative
441 "#},
442 repo_url = config.repo_url,
443 sha = commit.sha,
444 message = commit.message,
445 expanded_diff = commit.expanded_diff,
446 )
447}
448
449async fn analyze_commit(
450 client: &PlainLlmClient,
451 config: &SynthesizeConfig,
452 commit: &CommitInfo,
453 step_progress: Arc<StepProgress>,
454) -> Result<Option<ClaudeResponse>> {
455 use anthropic::{Message, RequestContent, Role};
456
457 let prompt = build_prompt(config, commit);
458 let messages = vec![Message {
459 role: Role::User,
460 content: vec![RequestContent::Text {
461 text: prompt,
462 cache_control: None,
463 }],
464 }];
465
466 let response = client
467 .generate_streaming("claude-sonnet-4-5", 8192, messages, |chars, _text| {
468 step_progress.set_substatus(format!("analyzing: {:.1}K", chars as f64 / 1000.0));
469 })
470 .await?;
471
472 // Extract text content from response
473 let response_text: String = response
474 .content
475 .iter()
476 .filter_map(|block| {
477 if let ResponseContent::Text { text } = block {
478 Some(text.as_str())
479 } else {
480 None
481 }
482 })
483 .collect::<Vec<_>>()
484 .join("\n");
485
486 parse_claude_response(&response_text)
487}
488
489fn parse_claude_response(response: &str) -> Result<Option<ClaudeResponse>> {
490 // Check for NO_PATTERN
491 if response.contains("NO_PATTERN:") {
492 return Ok(None);
493 }
494
495 // Parse NAME
496 let name = response
497 .lines()
498 .find(|l| l.starts_with("NAME:"))
499 .map(|l| l.strip_prefix("NAME:").unwrap_or("").trim().to_string())
500 .unwrap_or_else(|| "unnamed example".to_string());
501
502 // Parse ANALYSIS section (Claude's planning) - this is the primary reasoning
503 let reasoning = extract_section(
504 response,
505 "ANALYSIS:",
506 &["NAME:", "REASONING:", "EDIT_HISTORY:", "EXPECTED_PATCH:"],
507 )
508 .unwrap_or_default();
509
510 // Parse EDIT_HISTORY diff block
511 let edit_history_hunks = extract_diff_block(response, "EDIT_HISTORY:")?;
512
513 // Parse EXPECTED_PATCH diff block
514 let expected_patch_hunks = extract_diff_block(response, "EXPECTED_PATCH:")?;
515
516 if edit_history_hunks.is_empty() {
517 anyhow::bail!("No edit history hunks found in response");
518 }
519 if expected_patch_hunks.is_empty() {
520 anyhow::bail!("No expected patch hunks found in response");
521 }
522
523 Ok(Some(ClaudeResponse {
524 name,
525 reasoning,
526 edit_history_hunks,
527 expected_patch_hunks,
528 }))
529}
530
531fn extract_section(text: &str, start_marker: &str, end_markers: &[&str]) -> Option<String> {
532 let start_idx = text.find(start_marker)?;
533 let content_start = start_idx + start_marker.len();
534
535 let end_idx = end_markers
536 .iter()
537 .filter_map(|marker| text[content_start..].find(marker))
538 .min()
539 .map(|idx| content_start + idx)
540 .unwrap_or(text.len());
541
542 Some(text[content_start..end_idx].trim().to_string())
543}
544
545fn extract_diff_block(text: &str, section_marker: &str) -> Result<Vec<String>> {
546 let section_start = text
547 .find(section_marker)
548 .context(format!("Section {} not found", section_marker))?;
549
550 let after_marker = &text[section_start + section_marker.len()..];
551
552 // Find where the next major section starts (to bound our search)
553 let section_end = ["EXPECTED_PATCH:", "## "]
554 .iter()
555 .filter(|&&m| m != section_marker)
556 .filter_map(|marker| after_marker.find(marker))
557 .min()
558 .unwrap_or(after_marker.len());
559
560 let section_content = &after_marker[..section_end];
561
562 // Collect all ```diff blocks in this section
563 let mut hunks = Vec::new();
564 let mut search_start = 0;
565
566 while let Some(diff_start) = section_content[search_start..].find("```diff") {
567 let abs_diff_start = search_start + diff_start;
568 let block_content_start = section_content[abs_diff_start..]
569 .find('\n')
570 .map(|i| abs_diff_start + i + 1)
571 .unwrap_or(abs_diff_start);
572
573 if let Some(block_end_rel) = section_content[block_content_start..].find("```") {
574 let block_end = block_content_start + block_end_rel;
575 let diff_content = section_content[block_content_start..block_end].trim();
576
577 // Split this block into hunks (in case multiple hunks in one block)
578 hunks.extend(split_into_hunks(diff_content));
579
580 search_start = block_end + 3;
581 } else {
582 break;
583 }
584 }
585
586 if hunks.is_empty() {
587 anyhow::bail!("No diff blocks found in section {}", section_marker);
588 }
589
590 Ok(hunks)
591}
592
593/// Split a diff block into individual hunks, preserving file headers
594fn split_into_hunks(diff: &str) -> Vec<String> {
595 let mut hunks = Vec::new();
596 let mut current_file_header: Option<String> = None;
597 let mut current_hunk: Vec<String> = Vec::new();
598 let mut in_hunk = false;
599
600 for line in diff.lines() {
601 if line.starts_with("--- a/") || line.starts_with("--- /") {
602 // Start of file header - flush previous hunk
603 if in_hunk && !current_hunk.is_empty() {
604 let mut hunk_text = String::new();
605 if let Some(ref header) = current_file_header {
606 hunk_text.push_str(header);
607 hunk_text.push('\n');
608 }
609 hunk_text.push_str(¤t_hunk.join("\n"));
610 hunks.push(hunk_text);
611 current_hunk.clear();
612 }
613 current_file_header = Some(line.to_string());
614 in_hunk = false;
615 } else if line.starts_with("+++ b/") || line.starts_with("+++ /") {
616 if let Some(ref mut header) = current_file_header {
617 header.push('\n');
618 header.push_str(line);
619 }
620 } else if line.starts_with("@@ ") {
621 // New hunk - flush previous
622 if in_hunk && !current_hunk.is_empty() {
623 let mut hunk_text = String::new();
624 if let Some(ref header) = current_file_header {
625 hunk_text.push_str(header);
626 hunk_text.push('\n');
627 }
628 hunk_text.push_str(¤t_hunk.join("\n"));
629 hunks.push(hunk_text);
630 current_hunk.clear();
631 }
632 current_hunk.push(line.to_string());
633 in_hunk = true;
634 } else if in_hunk {
635 current_hunk.push(line.to_string());
636 }
637 }
638
639 // Flush final hunk
640 if !current_hunk.is_empty() {
641 let mut hunk_text = String::new();
642 if let Some(ref header) = current_file_header {
643 hunk_text.push_str(header);
644 hunk_text.push('\n');
645 }
646 hunk_text.push_str(¤t_hunk.join("\n"));
647 hunks.push(hunk_text);
648 }
649
650 hunks
651}
652
653/// Validate Claude's output by applying diffs and build the ExampleSpec
654async fn build_example(
655 config: &SynthesizeConfig,
656 commit: &CommitInfo,
657 repo_path: &Path,
658 response: &ClaudeResponse,
659) -> Result<ExampleSpec, String> {
660 // Validate expected patch hunks
661 if response.expected_patch_hunks.len() != 1 {
662 return Err(format!(
663 "Expected exactly 1 expected patch hunk, got {}",
664 response.expected_patch_hunks.len()
665 ));
666 }
667
668 // Parse the expected patch to determine cursor file
669 let expected_patch = &response.expected_patch_hunks[0];
670 let cursor_file = extract_file_from_hunk(expected_patch)
671 .ok_or_else(|| "Could not determine file from expected patch".to_string())?;
672
673 // Get the file content before the commit
674 let before_content = run_git(
675 repo_path,
676 &["show", &format!("{}^:{}", commit.sha, cursor_file)],
677 )
678 .await
679 .map_err(|e| format!("Failed to get file content for {}: {}", cursor_file, e))?;
680
681 // Build edit history diff from Claude's hunks
682 let edit_history = response.edit_history_hunks.join("\n");
683
684 // Apply edit history to get intermediate state (validates edit history)
685 let intermediate_state =
686 apply_edit_history_to_content(&before_content, &edit_history, &cursor_file)?;
687
688 // Validate expected patch applies to intermediate state
689 let expected_patch_with_header = ensure_diff_header(expected_patch, &cursor_file);
690 apply_diff_to_string(&expected_patch_with_header, &intermediate_state)
691 .map_err(|e| format!("Expected patch failed to apply: {}", e))?;
692
693 // Find where the expected patch edits would apply in the intermediate state
694 let edits = edits_for_diff(&intermediate_state, &expected_patch_with_header)
695 .map_err(|e| format!("Failed to parse expected patch: {}", e))?;
696 if edits.is_empty() {
697 return Err(
698 "Could not locate expected patch in file (context not found or ambiguous)".to_string(),
699 );
700 }
701
702 // Use the start of the first edit for cursor positioning
703 let cursor_byte_offset = edits[0].0.start;
704
705 // Extract excerpt around the edit location
706 let (excerpt, cursor_offset) = extract_cursor_excerpt(&intermediate_state, cursor_byte_offset)?;
707
708 // Build the ExampleSpec and use set_cursor_excerpt to format with comment marker
709 let comment_prefix = line_comment_prefix(&cursor_file);
710 let reasoning_with_source = format!(
711 "Source commit: {} ({})\n\n{}",
712 commit.sha,
713 truncate_message(&commit.message, 60),
714 response.reasoning
715 );
716 let mut spec = ExampleSpec {
717 name: response.name.clone(),
718 repository_url: config.repo_url.clone(),
719 revision: commit.parent_sha.clone(),
720 tags: Vec::new(),
721 reasoning: Some(reasoning_with_source),
722 uncommitted_diff: String::new(),
723 cursor_path: Arc::from(Path::new(&cursor_file)),
724 cursor_position: String::new(),
725 edit_history,
726 expected_patches: vec![expected_patch_with_header],
727 };
728 spec.set_cursor_excerpt(&excerpt, cursor_offset, comment_prefix);
729
730 Ok(spec)
731}
732
733/// Extract file path from a hunk (looks for --- a/path or +++ b/path)
734fn extract_file_from_hunk(hunk: &str) -> Option<String> {
735 for line in hunk.lines() {
736 if let Some(path) = line.strip_prefix("+++ b/") {
737 return Some(path.to_string());
738 }
739 if let Some(path) = line.strip_prefix("--- a/") {
740 return Some(path.to_string());
741 }
742 }
743 None
744}
745
746/// Ensure a hunk has proper file headers
747fn ensure_diff_header(hunk: &str, file_path: &str) -> String {
748 if hunk.contains("--- a/") || hunk.contains("+++ b/") {
749 return hunk.to_string();
750 }
751 format!("--- a/{}\n+++ b/{}\n{}", file_path, file_path, hunk)
752}
753
754/// Apply edit history to file content, only if hunks affect this file
755fn apply_edit_history_to_content(
756 content: &str,
757 edit_history: &str,
758 cursor_file: &str,
759) -> Result<String, String> {
760 // Extract just the hunks for this file from the edit history
761 let file_diff = extract_file_diff_from_combined(edit_history, cursor_file);
762
763 if file_diff.is_empty() {
764 return Ok(content.to_string());
765 }
766
767 apply_diff_to_string(&file_diff, content)
768 .map_err(|e| format!("Failed to apply edit history: {}", e))
769}
770
771/// Extract hunks for a specific file from a combined diff
772fn extract_file_diff_from_combined(combined_diff: &str, target_file: &str) -> String {
773 let mut result = String::new();
774 let mut in_target_file = false;
775 let mut found_header = false;
776
777 for line in combined_diff.lines() {
778 if line.starts_with("--- a/") {
779 let file = line.strip_prefix("--- a/").unwrap_or("");
780 in_target_file = file == target_file;
781 if in_target_file {
782 result.push_str(line);
783 result.push('\n');
784 found_header = false;
785 }
786 } else if line.starts_with("+++ b/") && in_target_file {
787 result.push_str(line);
788 result.push('\n');
789 found_header = true;
790 } else if in_target_file && found_header {
791 if line.starts_with("--- a/") {
792 break;
793 }
794 result.push_str(line);
795 result.push('\n');
796 }
797 }
798
799 result
800}
801
802/// Extract a cursor position excerpt from content around a byte offset.
803/// Returns the excerpt and the cursor offset within the excerpt.
804fn extract_cursor_excerpt(
805 content: &str,
806 cursor_byte_offset: usize,
807) -> Result<(String, usize), String> {
808 // Find the line containing the cursor
809 let line_start = content[..cursor_byte_offset]
810 .rfind('\n')
811 .map(|pos| pos + 1)
812 .unwrap_or(0);
813 let line_end = content[cursor_byte_offset..]
814 .find('\n')
815 .map(|pos| cursor_byte_offset + pos)
816 .unwrap_or(content.len());
817
818 // Get context lines before
819 let lines_before: Vec<&str> = content[..line_start].lines().collect();
820 let context_before: Vec<&str> = lines_before.iter().rev().take(3).rev().cloned().collect();
821
822 // Get context lines after
823 let after_line_end = if line_end < content.len() {
824 line_end + 1
825 } else {
826 line_end
827 };
828 let context_after: Vec<&str> = content[after_line_end..].lines().take(4).collect();
829
830 // The line containing the cursor
831 let cursor_line = &content[line_start..line_end];
832 let cursor_column = cursor_byte_offset - line_start;
833
834 // Build the excerpt
835 let mut excerpt = String::new();
836 for line in context_before {
837 excerpt.push_str(line);
838 excerpt.push('\n');
839 }
840 // Track where cursor will be in the excerpt
841 let cursor_offset_in_excerpt = excerpt.len() + cursor_column;
842 // Line containing cursor
843 excerpt.push_str(cursor_line);
844 excerpt.push('\n');
845 for line in context_after {
846 excerpt.push_str(line);
847 excerpt.push('\n');
848 }
849
850 // Trim trailing newline
851 if excerpt.ends_with('\n') {
852 excerpt.pop();
853 }
854
855 Ok((excerpt, cursor_offset_in_excerpt))
856}
857
858/// Get the line comment prefix for a file based on its extension
859fn line_comment_prefix(file_path: &str) -> &'static str {
860 let extension = file_path.rsplit('.').next().unwrap_or("");
861 match extension {
862 "rs" | "c" | "cpp" | "cc" | "h" | "hpp" | "js" | "ts" | "tsx" | "jsx" | "go" | "java"
863 | "swift" | "kt" | "kts" | "scala" | "cs" | "m" | "mm" | "zig" | "v" | "d" => "//",
864 "py" | "rb" | "sh" | "bash" | "zsh" | "pl" | "pm" | "r" | "jl" | "yaml" | "yml"
865 | "toml" | "coffee" | "cr" | "ex" | "exs" | "elixir" => "#",
866 "lua" | "hs" | "sql" => "--",
867 "lisp" | "clj" | "cljs" | "scm" | "rkt" | "el" => ";",
868 "erl" | "hrl" => "%",
869 _ => "//",
870 }
871}
872
873fn format_rejected_example(response: &ClaudeResponse, rejection_reason: &str) -> String {
874 let mut content = String::new();
875 content.push_str("# Rejected Example\n\n");
876 content.push_str(&format!("## Name\n\n{}\n\n", response.name));
877 content.push_str(&format!("## Reasoning\n\n{}\n\n", response.reasoning));
878 content.push_str("## Edit History Hunks\n\n```diff\n");
879 for hunk in &response.edit_history_hunks {
880 content.push_str(hunk);
881 content.push_str("\n\n");
882 }
883 content.push_str("```\n\n");
884 content.push_str("## Expected Patch Hunks\n\n```diff\n");
885 for hunk in &response.expected_patch_hunks {
886 content.push_str(hunk);
887 content.push_str("\n\n");
888 }
889 content.push_str("```\n\n");
890 content.push_str(&format!("## Rejection Reason\n\n{}\n", rejection_reason));
891 content
892}