Add reasoning_effort field to OpenAI compatible model configuration (#50582)

Vimsucks created

Some model like glm-5、kimi-k2.5 support reasoning, but require
reasoning_effort parameter

This pr add support for setting reasoing_effort for openai compatible
models

Tested using the following config:

```json
{
  "language_models": {
    "openai_compatible": {
      "My LiteLLM": {
        "available_models": [
          {
            "name": "glm-5",
            "display_name": "glm-5",
            "max_tokens": 73728,
            "reasoning_effort": "low"
          },
          {
            "name": "kimi-k2.5",
            "display_name": "kimi-k2.5",
            "max_tokens": 262144,
            "reasoning_effort": "low"
          }
        ]
      }
    }
  }
}

```

Release Notes:

- Added a setting to control `reasoning_effort` in custom
OpenAI-compatible models

Change summary

crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs | 1 
crates/language_models/src/provider/open_ai_compatible.rs         | 4 
crates/settings_content/src/language_model.rs                     | 1 
3 files changed, 4 insertions(+), 2 deletions(-)

Detailed changes

crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs 🔗

@@ -202,6 +202,7 @@ impl ModelInput {
                 .text(cx)
                 .parse::<u64>()
                 .map_err(|_| SharedString::from("Max Tokens must be a number"))?,
+            reasoning_effort: None,
             capabilities: ModelCapabilities {
                 tools: self.capabilities.supports_tools.selected(),
                 images: self.capabilities.supports_images.selected(),

crates/language_models/src/provider/open_ai_compatible.rs 🔗

@@ -402,7 +402,7 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
                 self.model.capabilities.parallel_tool_calls,
                 self.model.capabilities.prompt_cache_key,
                 self.max_output_tokens(),
-                None,
+                self.model.reasoning_effort.clone(),
             );
             let completions = self.stream_completion(request, cx);
             async move {
@@ -417,7 +417,7 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
                 self.model.capabilities.parallel_tool_calls,
                 self.model.capabilities.prompt_cache_key,
                 self.max_output_tokens(),
-                None,
+                self.model.reasoning_effort.clone(),
             );
             let completions = self.stream_response(request, cx);
             async move {

crates/settings_content/src/language_model.rs 🔗

@@ -278,6 +278,7 @@ pub struct OpenAiCompatibleAvailableModel {
     pub max_tokens: u64,
     pub max_output_tokens: Option<u64>,
     pub max_completion_tokens: Option<u64>,
+    pub reasoning_effort: Option<OpenAiReasoningEffort>,
     #[serde(default)]
     pub capabilities: OpenAiCompatibleModelCapabilities,
 }