decrease temperature for inline assist on code content (#3145)

Kyle Caverly created

"Temperature" is a parameter in OpenAI GPT models, to control for
randomess in the generated content. To decrease the probability of
either escaping the markdown blocks and creating invalid code, we
decreased temperature for all Non-Prose files. For Markdown or Plain
Text, in which more creativity may be a good thing, we increase the
temperature to allow for more randomness. Along with this, we ask the
generate inline prompt to include only the code and not markdown blocks,
as it appears that lower temperature may decrease the probability of
introducing random markdown blocks.

Release Notes (Internal Only):
- Decrease temperature for inline assist on code content.

Change summary

crates/ai/src/completion.rs             |  2 ++
crates/ai/src/templates/generate.rs     |  2 +-
crates/assistant/src/assistant_panel.rs | 20 ++++++++++++++++++++
3 files changed, 23 insertions(+), 1 deletion(-)

Detailed changes

crates/ai/src/completion.rs 🔗

@@ -53,6 +53,8 @@ pub struct OpenAIRequest {
     pub model: String,
     pub messages: Vec<RequestMessage>,
     pub stream: bool,
+    pub stop: Vec<String>,
+    pub temperature: f32,
 }
 
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]

crates/ai/src/templates/generate.rs 🔗

@@ -78,7 +78,7 @@ impl PromptTemplate for GenerateInlineContent {
 
         match file_type {
             PromptFileType::Code => {
-                writeln!(prompt, "Always wrap your code in a Markdown block.").unwrap();
+                // writeln!(prompt, "Always wrap your code in a Markdown block.").unwrap();
             }
             _ => {}
         }

crates/assistant/src/assistant_panel.rs 🔗

@@ -661,6 +661,19 @@ impl AssistantPanel {
             None
         };
 
+        // Higher Temperature increases the randomness of model outputs.
+        // If Markdown or No Language is Known, increase the randomness for more creative output
+        // If Code, decrease temperature to get more deterministic outputs
+        let temperature = if let Some(language) = language_name.clone() {
+            if language.to_string() != "Markdown".to_string() {
+                0.5
+            } else {
+                1.0
+            }
+        } else {
+            1.0
+        };
+
         let user_prompt = user_prompt.to_string();
 
         let snippets = if retrieve_context {
@@ -731,10 +744,13 @@ impl AssistantPanel {
                 role: Role::User,
                 content: prompt,
             });
+
             let request = OpenAIRequest {
                 model: model.full_name().into(),
                 messages,
                 stream: true,
+                stop: vec!["|END|>".to_string()],
+                temperature,
             };
             codegen.update(&mut cx, |codegen, cx| codegen.start(request, cx));
             anyhow::Ok(())
@@ -1727,6 +1743,8 @@ impl Conversation {
                     .map(|message| message.to_open_ai_message(self.buffer.read(cx)))
                     .collect(),
                 stream: true,
+                stop: vec![],
+                temperature: 1.0,
             };
 
             let stream = stream_completion(api_key, cx.background().clone(), request);
@@ -2011,6 +2029,8 @@ impl Conversation {
                     model: self.model.full_name().to_string(),
                     messages: messages.collect(),
                     stream: true,
+                    stop: vec![],
+                    temperature: 1.0,
                 };
 
                 let stream = stream_completion(api_key, cx.background().clone(), request);