ep: Handle input data errors (#48004)

Oleksiy Syvokon created

Release Notes:

- N/A

Change summary

crates/edit_prediction_cli/src/format_prompt.rs | 22 +++++++++++-------
crates/edit_prediction_cli/src/main.rs          |  9 +++----
2 files changed, 17 insertions(+), 14 deletions(-)

Detailed changes

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -5,7 +5,7 @@ use crate::{
     progress::{ExampleProgress, Step},
     retrieve_context::run_context_retrieval,
 };
-use anyhow::{Context as _, Result};
+use anyhow::{Context as _, Result, anyhow};
 use edit_prediction::cursor_excerpt::editable_and_context_ranges_for_cursor_position;
 use gpui::{AppContext, AsyncApp};
 use language::{Buffer, OffsetRangeExt, Point};
@@ -193,14 +193,14 @@ impl TeacherPrompt {
         // Extract updated (new) editable region from the model response.
         // The model may include editable region markers in its output, so we need to strip them.
         let new_editable_region = extract_last_codeblock(response);
-        let mut new_editable_region = Self::extract_editable_region(&new_editable_region);
+        let mut new_editable_region = Self::extract_editable_region(&new_editable_region)?;
         let old_editable_region = Self::extract_editable_region(
             &example
                 .prompt
                 .as_ref()
                 .context("example prompt missing")?
                 .input,
-        );
+        )?;
         let prompt_inputs = example
             .prompt_inputs
             .as_ref()
@@ -217,7 +217,7 @@ impl TeacherPrompt {
             .content
             .match_indices(&old_editable_region)
             .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset))
-            .unwrap();
+            .context("editable region not found in prompt content")?;
         let editable_region_start_line = prompt_inputs.content[..editable_region_offset]
             .matches('\n')
             .count();
@@ -320,16 +320,20 @@ impl TeacherPrompt {
         result
     }
 
-    fn extract_editable_region(text: &str) -> String {
+    fn extract_editable_region(text: &str) -> Result<String> {
         let start = text
             .rfind(Self::EDITABLE_REGION_START)
             .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
         let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
 
+        if start >= end {
+            return Err(anyhow!("Invalid editable region markers"));
+        }
+
         let region = &text[start..end];
         let region = region.strip_suffix('\n').unwrap_or(region);
 
-        region.replace(Self::USER_CURSOR_MARKER, "")
+        Ok(region.replace(Self::USER_CURSOR_MARKER, ""))
     }
 
     fn is_udiff_content_line(s: &str) -> bool {
@@ -486,7 +490,7 @@ mod tests {
             more
             lines here
             "};
-        let parsed = TeacherPrompt::extract_editable_region(text);
+        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
         assert_eq!(
             parsed,
             indoc::indoc! {"
@@ -538,7 +542,7 @@ mod tests {
         let text = indoc::indoc! {"
             one
             two three"};
-        let parsed = TeacherPrompt::extract_editable_region(text);
+        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
         assert_eq!(
             parsed,
             indoc::indoc! {"
@@ -556,7 +560,7 @@ mod tests {
 
             <|editable_region_end|>
             "};
-        let parsed = TeacherPrompt::extract_editable_region(text);
+        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
         assert_eq!(
             parsed,
             indoc::indoc! {"

crates/edit_prediction_cli/src/main.rs 🔗

@@ -1130,11 +1130,10 @@ async fn handle_error(
         writeln!(file, "{}", serde_json::to_string(example).unwrap())
             .expect("Failed to write to failed.jsonl");
 
-        let cursor_path = example
-            .repo_name()
-            .unwrap()
-            .worktree_path()
-            .join(&example.spec.cursor_path);
+        let cursor_path = match example.repo_name() {
+            Ok(repo_name) => repo_name.worktree_path().join(&example.spec.cursor_path),
+            Err(_) => example.spec.cursor_path.as_ref().to_path_buf(),
+        };
         msg = format!(
             indoc::indoc! {"
                 While processing \"{}\":