Inline assistant finishing touches (#44851)

Mikayla Maki created 1 day ago

Tighten up evals, make assistant less talkative, get them passing a bit
more, improve telemetry, stream in failure messages, and turn it on for
staff.

Release Notes:

- N/A

Change summary

crates/agent_ui/src/buffer_codegen.rs       | 34 +++++----------
crates/agent_ui/src/inline_assistant.rs     | 50 +++++++++++++---------
crates/agent_ui/src/inline_prompt_editor.rs | 24 ++++++++--
crates/feature_flags/src/flags.rs           |  4 -
crates/markdown/src/markdown.rs             |  2 
5 files changed, 60 insertions(+), 54 deletions(-)

Detailed changes

crates/agent_ui/src/buffer_codegen.rs 🔗

@@ -42,29 +42,24 @@ use std::{
 };
 use streaming_diff::{CharOperation, LineDiff, LineOperation, StreamingDiff};
 
-/// Use this tool to provide a message to the user when you're unable to complete a task.
+/// Use this tool when you cannot or should not make a rewrite. This includes:
+/// - The user's request is unclear, ambiguous, or nonsensical
+/// - The requested change cannot be made by only editing the <rewrite_this> section
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
 pub struct FailureMessageInput {
     /// A brief message to the user explaining why you're unable to fulfill the request or to ask a question about the request.
-    ///
-    /// The message may use markdown formatting if you wish.
     #[serde(default)]
     pub message: String,
 }
 
 /// Replaces text in <rewrite_this></rewrite_this> tags with your replacement_text.
+/// Only use this tool when you are confident you understand the user's request and can fulfill it
+/// by editing the marked section.
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
 pub struct RewriteSectionInput {
     /// The text to replace the section with.
     #[serde(default)]
     pub replacement_text: String,
-
-    /// A brief description of the edit you have made.
-    ///
-    /// The description may use markdown formatting if you wish.
-    /// This is optional - if the edit is simple or obvious, you should leave it empty.
-    #[serde(default)]
-    pub description: String,
 }
 
 pub struct BufferCodegen {
@@ -401,7 +396,7 @@ impl CodegenAlternative {
         &self.last_equal_ranges
     }
 
-    fn use_streaming_tools(model: &dyn LanguageModel, cx: &App) -> bool {
+    pub fn use_streaming_tools(model: &dyn LanguageModel, cx: &App) -> bool {
         model.supports_streaming_tools()
             && cx.has_flag::<InlineAssistantUseToolFeatureFlag>()
             && AgentSettings::get_global(cx).inline_assistant_use_streaming_tools
@@ -1160,28 +1155,21 @@ impl CodegenAlternative {
             let chars_read_so_far = Arc::new(Mutex::new(0usize));
             let process_tool_use = move |tool_use: LanguageModelToolUse| -> Option<ToolUseOutput> {
                 let mut chars_read_so_far = chars_read_so_far.lock();
-                let is_complete = tool_use.is_input_complete;
                 match tool_use.name.as_ref() {
                     "rewrite_section" => {
-                        let Ok(mut input) =
+                        let Ok(input) =
                             serde_json::from_value::<RewriteSectionInput>(tool_use.input)
                         else {
                             return None;
                         };
                         let text = input.replacement_text[*chars_read_so_far..].to_string();
                         *chars_read_so_far = input.replacement_text.len();
-                        let description = is_complete
-                            .then(|| {
-                                let desc = std::mem::take(&mut input.description);
-                                if desc.is_empty() { None } else { Some(desc) }
-                            })
-                            .flatten();
-                        Some(ToolUseOutput::Rewrite { text, description })
+                        Some(ToolUseOutput::Rewrite {
+                            text,
+                            description: None,
+                        })
                     }
                     "failure_message" => {
-                        if !is_complete {
-                            return None;
-                        }
                         let Ok(mut input) =
                             serde_json::from_value::<FailureMessageInput>(tool_use.input)
                         else {

crates/agent_ui/src/inline_assistant.rs 🔗

@@ -2068,17 +2068,6 @@ pub mod test {
         },
     }
 
-    impl InlineAssistantOutput {
-        pub fn buffer_text(&self) -> &str {
-            match self {
-                InlineAssistantOutput::Success {
-                    full_buffer_text, ..
-                } => full_buffer_text,
-                _ => "",
-            }
-        }
-    }
-
     pub fn run_inline_assistant_test<SetupF, TestF>(
         base_buffer: String,
         prompt: String,
@@ -2253,7 +2242,7 @@ pub mod evals {
     fn eval_cant_do() {
         run_eval(
             20,
-            1.0,
+            0.95,
             "Rename the struct to EvalExampleStructNope",
             indoc::indoc! {"
                 struct EvalExampleStruct {
@@ -2270,7 +2259,7 @@ pub mod evals {
     fn eval_unclear() {
         run_eval(
             20,
-            1.0,
+            0.95,
             "Make exactly the change I want you to make",
             indoc::indoc! {"
                 struct EvalExampleStruct {
@@ -2360,15 +2349,34 @@ pub mod evals {
         correct_output: impl Into<String>,
     ) -> impl Fn(InlineAssistantOutput) -> EvalOutput<()> {
         let correct_output = correct_output.into();
-        move |output| {
-            if output.buffer_text() == correct_output {
-                EvalOutput::passed("Assistant output matches")
-            } else {
-                EvalOutput::failed(format!(
-                    "Assistant output does not match expected output: {:?}",
-                    output
-                ))
+        move |output| match output {
+            InlineAssistantOutput::Success {
+                description,
+                full_buffer_text,
+                ..
+            } => {
+                if full_buffer_text == correct_output && description.is_none() {
+                    EvalOutput::passed("Assistant output matches")
+                } else if full_buffer_text == correct_output {
+                    EvalOutput::failed(format!(
+                        "Assistant output produced an unescessary description description:\n{:?}",
+                        description
+                    ))
+                } else {
+                    EvalOutput::failed(format!(
+                        "Assistant output does not match expected output:\n{:?}\ndescription:\n{:?}",
+                        full_buffer_text, description
+                    ))
+                }
             }
+            o @ InlineAssistantOutput::Failure { .. } => EvalOutput::failed(format!(
+                "Assistant output does not match expected output: {:?}",
+                o
+            )),
+            o @ InlineAssistantOutput::Malformed { .. } => EvalOutput::failed(format!(
+                "Assistant output does not match expected output: {:?}",
+                o
+            )),
         }
     }
 }

crates/agent_ui/src/inline_prompt_editor.rs 🔗

@@ -33,7 +33,7 @@ use workspace::{Toast, Workspace};
 use zed_actions::agent::ToggleModelSelector;
 
 use crate::agent_model_selector::AgentModelSelector;
-use crate::buffer_codegen::BufferCodegen;
+use crate::buffer_codegen::{BufferCodegen, CodegenAlternative};
 use crate::completion_provider::{
     PromptCompletionProvider, PromptCompletionProviderDelegate, PromptContextType,
 };
@@ -585,12 +585,18 @@ impl<T: 'static> PromptEditor<T> {
             }
             CompletionState::Generated { completion_text } => {
                 let model_info = self.model_selector.read(cx).active_model(cx);
-                let model_id = {
+                let (model_id, use_streaming_tools) = {
                     let Some(configured_model) = model_info else {
                         self.toast("No configured model", None, cx);
                         return;
                     };
-                    configured_model.model.telemetry_id()
+                    (
+                        configured_model.model.telemetry_id(),
+                        CodegenAlternative::use_streaming_tools(
+                            configured_model.model.as_ref(),
+                            cx,
+                        ),
+                    )
                 };
 
                 let selected_text = match &self.mode {
@@ -616,6 +622,7 @@ impl<T: 'static> PromptEditor<T> {
                     prompt = prompt,
                     completion = completion_text,
                     selected_text = selected_text,
+                    use_streaming_tools
                 );
 
                 self.session_state.completion = CompletionState::Rated;
@@ -641,12 +648,18 @@ impl<T: 'static> PromptEditor<T> {
             }
             CompletionState::Generated { completion_text } => {
                 let model_info = self.model_selector.read(cx).active_model(cx);
-                let model_telemetry_id = {
+                let (model_telemetry_id, use_streaming_tools) = {
                     let Some(configured_model) = model_info else {
                         self.toast("No configured model", None, cx);
                         return;
                     };
-                    configured_model.model.telemetry_id()
+                    (
+                        configured_model.model.telemetry_id(),
+                        CodegenAlternative::use_streaming_tools(
+                            configured_model.model.as_ref(),
+                            cx,
+                        ),
+                    )
                 };
 
                 let selected_text = match &self.mode {
@@ -672,6 +685,7 @@ impl<T: 'static> PromptEditor<T> {
                     prompt = prompt,
                     completion = completion_text,
                     selected_text = selected_text,
+                    use_streaming_tools
                 );
 
                 self.session_state.completion = CompletionState::Rated;

crates/feature_flags/src/flags.rs 🔗

@@ -16,8 +16,4 @@ pub struct InlineAssistantUseToolFeatureFlag;
 
 impl FeatureFlag for InlineAssistantUseToolFeatureFlag {
     const NAME: &'static str = "inline-assistant-use-tool";
-
-    fn enabled_for_staff() -> bool {
-        true
-    }
 }

crates/markdown/src/markdown.rs 🔗

@@ -251,7 +251,7 @@ impl Markdown {
         self.autoscroll_request = None;
         self.pending_parse = None;
         self.should_reparse = false;
-        self.parsed_markdown = ParsedMarkdown::default();
+        // Don't clear parsed_markdown here - keep existing content visible until new parse completes
         self.parse(cx);
     }