Allow changing the context window size for Ollama (#44506)

Santiago Bernhardt and Conrad Irwin created 2 months ago

Release Notes:

- Changed the way context window is set for ollama at the provider level
instead of per model.

---------

Co-authored-by: Conrad Irwin <conrad.irwin@gmail.com>

Change summary

crates/language_models/src/provider/ollama.rs | 137 +++++++++++++++++++-
crates/language_models/src/settings.rs        |   1 
crates/ollama/src/ollama.rs                   |  22 ---
crates/settings_content/src/language_model.rs |   1 
docs/src/ai/llm-providers.md                  |  21 ++
5 files changed, 149 insertions(+), 33 deletions(-)

Detailed changes

crates/language_models/src/provider/ollama.rs 🔗

@@ -45,6 +45,7 @@ pub struct OllamaSettings {
     pub api_url: String,
     pub auto_discover: bool,
     pub available_models: Vec<AvailableModel>,
+    pub context_window: Option<u64>,
 }
 
 pub struct OllamaLanguageModelProvider {
@@ -246,14 +247,20 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
         let settings = OllamaLanguageModelProvider::settings(cx);
 
         // Add models from the Ollama API
-        if settings.auto_discover {
-            for model in self.state.read(cx).fetched_models.iter() {
-                models.insert(model.name.clone(), model.clone());
+        for model in self.state.read(cx).fetched_models.iter() {
+            let mut model = model.clone();
+            if let Some(context_window) = settings.context_window {
+                model.max_tokens = context_window;
             }
+            models.insert(model.name.clone(), model);
         }
 
         // Override with available models from settings
-        merge_settings_into_models(&mut models, &settings.available_models);
+        merge_settings_into_models(
+            &mut models,
+            &settings.available_models,
+            settings.context_window,
+        );
 
         let mut models = models
             .into_values()
@@ -604,6 +611,7 @@ fn map_to_language_model_completion_events(
 struct ConfigurationView {
     api_key_editor: Entity<InputField>,
     api_url_editor: Entity<InputField>,
+    context_window_editor: Entity<InputField>,
     state: Entity<State>,
 }
 
@@ -617,6 +625,14 @@ impl ConfigurationView {
             input
         });
 
+        let context_window_editor = cx.new(|cx| {
+            let input = InputField::new(window, cx, "8192").label("Context Window");
+            if let Some(context_window) = OllamaLanguageModelProvider::settings(cx).context_window {
+                input.set_text(&context_window.to_string(), window, cx);
+            }
+            input
+        });
+
         cx.observe(&state, |_, _, cx| {
             cx.notify();
         })
@@ -625,6 +641,7 @@ impl ConfigurationView {
         Self {
             api_key_editor,
             api_url_editor,
+            context_window_editor,
             state,
         }
     }
@@ -712,7 +729,57 @@ impl ConfigurationView {
         cx.notify();
     }
 
-    fn render_instructions(cx: &mut Context<Self>) -> Div {
+    fn save_context_window(&mut self, cx: &mut Context<Self>) {
+        let context_window_str = self
+            .context_window_editor
+            .read(cx)
+            .text(cx)
+            .trim()
+            .to_string();
+        let current_context_window = OllamaLanguageModelProvider::settings(cx).context_window;
+
+        if let Ok(context_window) = context_window_str.parse::<u64>() {
+            if Some(context_window) != current_context_window {
+                let fs = <dyn Fs>::global(cx);
+                update_settings_file(fs, cx, move |settings, _| {
+                    settings
+                        .language_models
+                        .get_or_insert_default()
+                        .ollama
+                        .get_or_insert_default()
+                        .context_window = Some(context_window);
+                });
+            }
+        } else if context_window_str.is_empty() && current_context_window.is_some() {
+            let fs = <dyn Fs>::global(cx);
+            update_settings_file(fs, cx, move |settings, _| {
+                settings
+                    .language_models
+                    .get_or_insert_default()
+                    .ollama
+                    .get_or_insert_default()
+                    .context_window = None;
+            });
+        }
+    }
+
+    fn reset_context_window(&mut self, window: &mut Window, cx: &mut Context<Self>) {
+        self.context_window_editor
+            .update(cx, |input, cx| input.set_text("", window, cx));
+        let fs = <dyn Fs>::global(cx);
+        update_settings_file(fs, cx, |settings, _cx| {
+            if let Some(settings) = settings
+                .language_models
+                .as_mut()
+                .and_then(|models| models.ollama.as_mut())
+            {
+                settings.context_window = None;
+            }
+        });
+        cx.notify();
+    }
+
+    fn render_instructions(cx: &App) -> Div {
         v_flex()
             .gap_2()
             .child(Label::new(
@@ -774,6 +841,56 @@ impl ConfigurationView {
         }
     }
 
+    fn render_context_window_editor(&self, cx: &Context<Self>) -> Div {
+        let settings = OllamaLanguageModelProvider::settings(cx);
+        let custom_context_window_set = settings.context_window.is_some();
+
+        if custom_context_window_set {
+            h_flex()
+                .p_3()
+                .justify_between()
+                .rounded_md()
+                .border_1()
+                .border_color(cx.theme().colors().border)
+                .bg(cx.theme().colors().elevated_surface_background)
+                .child(
+                    h_flex()
+                        .gap_2()
+                        .child(Icon::new(IconName::Check).color(Color::Success))
+                        .child(v_flex().gap_1().child(Label::new(format!(
+                            "Context Window: {}",
+                            settings.context_window.unwrap()
+                        )))),
+                )
+                .child(
+                    Button::new("reset-context-window", "Reset")
+                        .label_size(LabelSize::Small)
+                        .icon(IconName::Undo)
+                        .icon_size(IconSize::Small)
+                        .icon_position(IconPosition::Start)
+                        .layer(ElevationIndex::ModalSurface)
+                        .on_click(
+                            cx.listener(|this, _, window, cx| {
+                                this.reset_context_window(window, cx)
+                            }),
+                        ),
+                )
+        } else {
+            v_flex()
+                .on_action(
+                    cx.listener(|this, _: &menu::Confirm, _window, cx| {
+                        this.save_context_window(cx)
+                    }),
+                )
+                .child(self.context_window_editor.clone())
+                .child(
+                    Label::new("Default: Model specific")
+                        .size(LabelSize::Small)
+                        .color(Color::Muted),
+                )
+        }
+    }
+
     fn render_api_url_editor(&self, cx: &Context<Self>) -> Div {
         let api_url = OllamaLanguageModelProvider::api_url(cx);
         let custom_api_url_set = api_url != OLLAMA_API_URL;
@@ -823,6 +940,7 @@ impl Render for ConfigurationView {
             .gap_2()
             .child(Self::render_instructions(cx))
             .child(self.render_api_url_editor(cx))
+            .child(self.render_context_window_editor(cx))
             .child(self.render_api_key_editor(cx))
             .child(
                 h_flex()
@@ -910,10 +1028,13 @@ impl Render for ConfigurationView {
 fn merge_settings_into_models(
     models: &mut HashMap<String, ollama::Model>,
     available_models: &[AvailableModel],
+    context_window: Option<u64>,
 ) {
     for setting_model in available_models {
         if let Some(model) = models.get_mut(&setting_model.name) {
-            model.max_tokens = setting_model.max_tokens;
+            if context_window.is_none() {
+                model.max_tokens = setting_model.max_tokens;
+            }
             model.display_name = setting_model.display_name.clone();
             model.keep_alive = setting_model.keep_alive.clone();
             model.supports_tools = setting_model.supports_tools;
@@ -925,7 +1046,7 @@ fn merge_settings_into_models(
                 ollama::Model {
                     name: setting_model.name.clone(),
                     display_name: setting_model.display_name.clone(),
-                    max_tokens: setting_model.max_tokens,
+                    max_tokens: context_window.unwrap_or(setting_model.max_tokens),
                     keep_alive: setting_model.keep_alive.clone(),
                     supports_tools: setting_model.supports_tools,
                     supports_vision: setting_model.supports_images,
@@ -1003,7 +1124,7 @@ mod tests {
             },
         ];
 
-        merge_settings_into_models(&mut models, &available_models);
+        merge_settings_into_models(&mut models, &available_models, None);
 
         let model_1_5b = models
             .get("qwen2.5-coder:1.5b")

crates/language_models/src/settings.rs 🔗

@@ -81,6 +81,7 @@ impl settings::Settings for AllLanguageModelSettings {
                 api_url: ollama.api_url.unwrap(),
                 auto_discover: ollama.auto_discover.unwrap_or(true),
                 available_models: ollama.available_models.unwrap_or_default(),
+                context_window: ollama.context_window,
             },
             open_router: OpenRouterSettings {
                 api_url: open_router.api_url.unwrap(),

crates/ollama/src/ollama.rs 🔗

@@ -20,27 +20,9 @@ pub struct Model {
     pub supports_thinking: Option<bool>,
 }
 
-fn get_max_tokens(name: &str) -> u64 {
-    /// Default context length for unknown models.
+fn get_max_tokens(_name: &str) -> u64 {
     const DEFAULT_TOKENS: u64 = 4096;
-    /// Magic number. Lets many Ollama models work with ~16GB of ram.
-    /// Models that support context beyond 16k such as codestral (32k) or devstral (128k) will be clamped down to 16k
-    const MAXIMUM_TOKENS: u64 = 16384;
-
-    match name.split(':').next().unwrap() {
-        "granite-code" | "phi" | "tinyllama" => 2048,
-        "llama2" | "stablelm2" | "vicuna" | "yi" => 4096,
-        "aya" | "codegemma" | "gemma" | "gemma2" | "llama3" | "starcoder" => 8192,
-        "codellama" | "starcoder2" => 16384,
-        "codestral" | "dolphin-mixtral" | "llava" | "magistral" | "mistral" | "mixstral"
-        | "qwen2" | "qwen2.5-coder" => 32768,
-        "cogito" | "command-r" | "deepseek-coder-v2" | "deepseek-r1" | "deepseek-v3"
-        | "devstral" | "gemma3" | "gpt-oss" | "granite3.3" | "llama3.1" | "llama3.2"
-        | "llama3.3" | "mistral-nemo" | "phi3" | "phi3.5" | "phi4" | "qwen3" | "yi-coder" => 128000,
-        "qwen3-coder" => 256000,
-        _ => DEFAULT_TOKENS,
-    }
-    .clamp(1, MAXIMUM_TOKENS)
+    DEFAULT_TOKENS
 }
 
 impl Model {

crates/settings_content/src/language_model.rs 🔗

@@ -99,6 +99,7 @@ pub struct OllamaSettingsContent {
     pub api_url: Option<String>,
     pub auto_discover: Option<bool>,
     pub available_models: Option<Vec<OllamaAvailableModel>>,
+    pub context_window: Option<u64>,
 }
 
 #[with_fallible_options]

docs/src/ai/llm-providers.md 🔗

@@ -423,14 +423,23 @@ models are available.
 
 #### Ollama Context Length {#ollama-context}
 
-Zed has pre-configured maximum context lengths (`max_tokens`) to match the capabilities of common models.
-Zed API requests to Ollama include this as the `num_ctx` parameter, but the default values do not exceed `16384` so users with ~16GB of RAM are able to use most models out of the box.
-
-See [get_max_tokens in ollama.rs](https://github.com/zed-industries/zed/blob/main/crates/ollama/src/ollama.rs) for a complete set of defaults.
+Zed API requests to Ollama include the context length as the `num_ctx` parameter. By default, Zed uses a context length of `4096` tokens for all Ollama models.
 
 > **Note**: Token counts displayed in the Agent Panel are only estimates and will differ from the model's native tokenizer.
 
-Depending on your hardware or use-case you may wish to limit or increase the context length for a specific model via settings.json:
+You can set a context length for all Ollama models using the `context_window` setting. This can also be configured in the Ollama provider settings UI:
+
+```json [settings]
+{
+  "language_models": {
+    "ollama": {
+      "context_window": 8192
+    }
+  }
+}
+```
+
+Alternatively, you can configure the context length per-model using the `max_tokens` field in `available_models`:
 
 ```json [settings]
 {
@@ -452,6 +461,8 @@ Depending on your hardware or use-case you may wish to limit or increase the con
 }
 ```
 
+> **Note**: If `context_window` is set, it overrides any per-model `max_tokens` values.
+
 If you specify a context length that is too large for your hardware, Ollama will log an error.
 You can watch these logs by running: `tail -f ~/.ollama/logs/ollama.log` (macOS) or `journalctl -u ollama -f` (Linux).
 Depending on the memory available on your machine, you may need to adjust the context length to a smaller value.