From 2eb015d10b5fb72f38c30506eedf25c73df73b7f Mon Sep 17 00:00:00 2001 From: Santiago Bernhardt <23391642+sbe-arg@users.noreply.github.com> Date: Fri, 13 Feb 2026 05:58:24 +1300 Subject: [PATCH] Allow changing the context window size for Ollama (#44506) Release Notes: - Changed the way context window is set for ollama at the provider level instead of per model. --------- Co-authored-by: Conrad Irwin --- crates/language_models/src/provider/ollama.rs | 137 +++++++++++++++++- crates/language_models/src/settings.rs | 1 + crates/ollama/src/ollama.rs | 22 +-- crates/settings_content/src/language_model.rs | 1 + docs/src/ai/llm-providers.md | 21 ++- 5 files changed, 149 insertions(+), 33 deletions(-) diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index da4b4fd51855625c5e21a062957b7e5154968267..27aa00c3f003cd002263875042ab50cf53417d43 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -45,6 +45,7 @@ pub struct OllamaSettings { pub api_url: String, pub auto_discover: bool, pub available_models: Vec, + pub context_window: Option, } pub struct OllamaLanguageModelProvider { @@ -246,14 +247,20 @@ impl LanguageModelProvider for OllamaLanguageModelProvider { let settings = OllamaLanguageModelProvider::settings(cx); // Add models from the Ollama API - if settings.auto_discover { - for model in self.state.read(cx).fetched_models.iter() { - models.insert(model.name.clone(), model.clone()); + for model in self.state.read(cx).fetched_models.iter() { + let mut model = model.clone(); + if let Some(context_window) = settings.context_window { + model.max_tokens = context_window; } + models.insert(model.name.clone(), model); } // Override with available models from settings - merge_settings_into_models(&mut models, &settings.available_models); + merge_settings_into_models( + &mut models, + &settings.available_models, + settings.context_window, + ); let mut models = models .into_values() @@ -604,6 +611,7 @@ fn map_to_language_model_completion_events( struct ConfigurationView { api_key_editor: Entity, api_url_editor: Entity, + context_window_editor: Entity, state: Entity, } @@ -617,6 +625,14 @@ impl ConfigurationView { input }); + let context_window_editor = cx.new(|cx| { + let input = InputField::new(window, cx, "8192").label("Context Window"); + if let Some(context_window) = OllamaLanguageModelProvider::settings(cx).context_window { + input.set_text(&context_window.to_string(), window, cx); + } + input + }); + cx.observe(&state, |_, _, cx| { cx.notify(); }) @@ -625,6 +641,7 @@ impl ConfigurationView { Self { api_key_editor, api_url_editor, + context_window_editor, state, } } @@ -712,7 +729,57 @@ impl ConfigurationView { cx.notify(); } - fn render_instructions(cx: &mut Context) -> Div { + fn save_context_window(&mut self, cx: &mut Context) { + let context_window_str = self + .context_window_editor + .read(cx) + .text(cx) + .trim() + .to_string(); + let current_context_window = OllamaLanguageModelProvider::settings(cx).context_window; + + if let Ok(context_window) = context_window_str.parse::() { + if Some(context_window) != current_context_window { + let fs = ::global(cx); + update_settings_file(fs, cx, move |settings, _| { + settings + .language_models + .get_or_insert_default() + .ollama + .get_or_insert_default() + .context_window = Some(context_window); + }); + } + } else if context_window_str.is_empty() && current_context_window.is_some() { + let fs = ::global(cx); + update_settings_file(fs, cx, move |settings, _| { + settings + .language_models + .get_or_insert_default() + .ollama + .get_or_insert_default() + .context_window = None; + }); + } + } + + fn reset_context_window(&mut self, window: &mut Window, cx: &mut Context) { + self.context_window_editor + .update(cx, |input, cx| input.set_text("", window, cx)); + let fs = ::global(cx); + update_settings_file(fs, cx, |settings, _cx| { + if let Some(settings) = settings + .language_models + .as_mut() + .and_then(|models| models.ollama.as_mut()) + { + settings.context_window = None; + } + }); + cx.notify(); + } + + fn render_instructions(cx: &App) -> Div { v_flex() .gap_2() .child(Label::new( @@ -774,6 +841,56 @@ impl ConfigurationView { } } + fn render_context_window_editor(&self, cx: &Context) -> Div { + let settings = OllamaLanguageModelProvider::settings(cx); + let custom_context_window_set = settings.context_window.is_some(); + + if custom_context_window_set { + h_flex() + .p_3() + .justify_between() + .rounded_md() + .border_1() + .border_color(cx.theme().colors().border) + .bg(cx.theme().colors().elevated_surface_background) + .child( + h_flex() + .gap_2() + .child(Icon::new(IconName::Check).color(Color::Success)) + .child(v_flex().gap_1().child(Label::new(format!( + "Context Window: {}", + settings.context_window.unwrap() + )))), + ) + .child( + Button::new("reset-context-window", "Reset") + .label_size(LabelSize::Small) + .icon(IconName::Undo) + .icon_size(IconSize::Small) + .icon_position(IconPosition::Start) + .layer(ElevationIndex::ModalSurface) + .on_click( + cx.listener(|this, _, window, cx| { + this.reset_context_window(window, cx) + }), + ), + ) + } else { + v_flex() + .on_action( + cx.listener(|this, _: &menu::Confirm, _window, cx| { + this.save_context_window(cx) + }), + ) + .child(self.context_window_editor.clone()) + .child( + Label::new("Default: Model specific") + .size(LabelSize::Small) + .color(Color::Muted), + ) + } + } + fn render_api_url_editor(&self, cx: &Context) -> Div { let api_url = OllamaLanguageModelProvider::api_url(cx); let custom_api_url_set = api_url != OLLAMA_API_URL; @@ -823,6 +940,7 @@ impl Render for ConfigurationView { .gap_2() .child(Self::render_instructions(cx)) .child(self.render_api_url_editor(cx)) + .child(self.render_context_window_editor(cx)) .child(self.render_api_key_editor(cx)) .child( h_flex() @@ -910,10 +1028,13 @@ impl Render for ConfigurationView { fn merge_settings_into_models( models: &mut HashMap, available_models: &[AvailableModel], + context_window: Option, ) { for setting_model in available_models { if let Some(model) = models.get_mut(&setting_model.name) { - model.max_tokens = setting_model.max_tokens; + if context_window.is_none() { + model.max_tokens = setting_model.max_tokens; + } model.display_name = setting_model.display_name.clone(); model.keep_alive = setting_model.keep_alive.clone(); model.supports_tools = setting_model.supports_tools; @@ -925,7 +1046,7 @@ fn merge_settings_into_models( ollama::Model { name: setting_model.name.clone(), display_name: setting_model.display_name.clone(), - max_tokens: setting_model.max_tokens, + max_tokens: context_window.unwrap_or(setting_model.max_tokens), keep_alive: setting_model.keep_alive.clone(), supports_tools: setting_model.supports_tools, supports_vision: setting_model.supports_images, @@ -1003,7 +1124,7 @@ mod tests { }, ]; - merge_settings_into_models(&mut models, &available_models); + merge_settings_into_models(&mut models, &available_models, None); let model_1_5b = models .get("qwen2.5-coder:1.5b") diff --git a/crates/language_models/src/settings.rs b/crates/language_models/src/settings.rs index b8f548acbeeac20b4c9af2a8e64de2ed2d805093..512ea05b0c6cfb7d91b39beb8aafb0de7916a78e 100644 --- a/crates/language_models/src/settings.rs +++ b/crates/language_models/src/settings.rs @@ -81,6 +81,7 @@ impl settings::Settings for AllLanguageModelSettings { api_url: ollama.api_url.unwrap(), auto_discover: ollama.auto_discover.unwrap_or(true), available_models: ollama.available_models.unwrap_or_default(), + context_window: ollama.context_window, }, open_router: OpenRouterSettings { api_url: open_router.api_url.unwrap(), diff --git a/crates/ollama/src/ollama.rs b/crates/ollama/src/ollama.rs index ede174654cf76299e7cc09b07612c92a9e3af70f..78a96d018e9c7d27df1fb3efbc9ba1516982fa34 100644 --- a/crates/ollama/src/ollama.rs +++ b/crates/ollama/src/ollama.rs @@ -20,27 +20,9 @@ pub struct Model { pub supports_thinking: Option, } -fn get_max_tokens(name: &str) -> u64 { - /// Default context length for unknown models. +fn get_max_tokens(_name: &str) -> u64 { const DEFAULT_TOKENS: u64 = 4096; - /// Magic number. Lets many Ollama models work with ~16GB of ram. - /// Models that support context beyond 16k such as codestral (32k) or devstral (128k) will be clamped down to 16k - const MAXIMUM_TOKENS: u64 = 16384; - - match name.split(':').next().unwrap() { - "granite-code" | "phi" | "tinyllama" => 2048, - "llama2" | "stablelm2" | "vicuna" | "yi" => 4096, - "aya" | "codegemma" | "gemma" | "gemma2" | "llama3" | "starcoder" => 8192, - "codellama" | "starcoder2" => 16384, - "codestral" | "dolphin-mixtral" | "llava" | "magistral" | "mistral" | "mixstral" - | "qwen2" | "qwen2.5-coder" => 32768, - "cogito" | "command-r" | "deepseek-coder-v2" | "deepseek-r1" | "deepseek-v3" - | "devstral" | "gemma3" | "gpt-oss" | "granite3.3" | "llama3.1" | "llama3.2" - | "llama3.3" | "mistral-nemo" | "phi3" | "phi3.5" | "phi4" | "qwen3" | "yi-coder" => 128000, - "qwen3-coder" => 256000, - _ => DEFAULT_TOKENS, - } - .clamp(1, MAXIMUM_TOKENS) + DEFAULT_TOKENS } impl Model { diff --git a/crates/settings_content/src/language_model.rs b/crates/settings_content/src/language_model.rs index 1f0f338f6d7ac35b9d6862f961bb45f7d2abfb33..4d5e89f9ab7d1e647e82d22767ec2a9b91b80d6d 100644 --- a/crates/settings_content/src/language_model.rs +++ b/crates/settings_content/src/language_model.rs @@ -99,6 +99,7 @@ pub struct OllamaSettingsContent { pub api_url: Option, pub auto_discover: Option, pub available_models: Option>, + pub context_window: Option, } #[with_fallible_options] diff --git a/docs/src/ai/llm-providers.md b/docs/src/ai/llm-providers.md index 6a9b82d7e1dc752e373af576635be22cd44b08ee..6fd2495d98a306dbe4a701f2ce8de1da312340a2 100644 --- a/docs/src/ai/llm-providers.md +++ b/docs/src/ai/llm-providers.md @@ -423,14 +423,23 @@ models are available. #### Ollama Context Length {#ollama-context} -Zed has pre-configured maximum context lengths (`max_tokens`) to match the capabilities of common models. -Zed API requests to Ollama include this as the `num_ctx` parameter, but the default values do not exceed `16384` so users with ~16GB of RAM are able to use most models out of the box. - -See [get_max_tokens in ollama.rs](https://github.com/zed-industries/zed/blob/main/crates/ollama/src/ollama.rs) for a complete set of defaults. +Zed API requests to Ollama include the context length as the `num_ctx` parameter. By default, Zed uses a context length of `4096` tokens for all Ollama models. > **Note**: Token counts displayed in the Agent Panel are only estimates and will differ from the model's native tokenizer. -Depending on your hardware or use-case you may wish to limit or increase the context length for a specific model via settings.json: +You can set a context length for all Ollama models using the `context_window` setting. This can also be configured in the Ollama provider settings UI: + +```json [settings] +{ + "language_models": { + "ollama": { + "context_window": 8192 + } + } +} +``` + +Alternatively, you can configure the context length per-model using the `max_tokens` field in `available_models`: ```json [settings] { @@ -452,6 +461,8 @@ Depending on your hardware or use-case you may wish to limit or increase the con } ``` +> **Note**: If `context_window` is set, it overrides any per-model `max_tokens` values. + If you specify a context length that is too large for your hardware, Ollama will log an error. You can watch these logs by running: `tail -f ~/.ollama/logs/ollama.log` (macOS) or `journalctl -u ollama -f` (Linux). Depending on the memory available on your machine, you may need to adjust the context length to a smaller value.