ollama: Add Qwen3 and Gemma3 (default to 16K context) (#29580)

Peter Tripp created

If you have the VRAM you can increase the context by adding this to your
settings.json:

```json
  "language_models": {
    "ollama": {
      "available_models": [
        { "max_tokens": 65536, "name": "qwen3", "display_name": "Qwen3-64k" }
      ]
    }
  },
```

Release Notes:

- ollama: Add support for Qwen3. Defaults to 16K token context. See:
[Assistant Configuration
Docs](https://zed.dev/docs/assistant/configuration#ollama-context) to
increase.

Change summary

crates/ollama/src/ollama.rs | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

Detailed changes

crates/ollama/src/ollama.rs 🔗

@@ -84,7 +84,9 @@ fn get_max_tokens(name: &str) -> usize {
         "mistral" | "codestral" | "mixstral" | "llava" | "qwen2" | "qwen2.5-coder"
         | "dolphin-mixtral" => 32768,
         "llama3.1" | "llama3.2" | "llama3.3" | "phi3" | "phi3.5" | "phi4" | "command-r"
-        | "deepseek-coder-v2" | "deepseek-v3" | "deepseek-r1" | "yi-coder" => 128000,
+        | "qwen3" | "gemma3" | "deepseek-coder-v2" | "deepseek-v3" | "deepseek-r1" | "yi-coder" => {
+            128000
+        }
         _ => DEFAULT_TOKENS,
     }
     .clamp(1, MAXIMUM_TOKENS)