1use anyhow::{Result, anyhow};
2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
3use futures::{Stream, TryFutureExt, stream};
4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
5use http_client::HttpClient;
6use language_model::{
7 AuthenticateError, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
8 LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
9 LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
10 LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
11 LanguageModelToolUseId, MessageContent, RateLimiter, Role, StopReason,
12};
13use ollama::{
14 ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionTool,
15 OllamaToolCall, get_models, preload_model, show_model, stream_chat_completion,
16};
17use schemars::JsonSchema;
18use serde::{Deserialize, Serialize};
19use settings::{Settings, SettingsStore};
20use std::pin::Pin;
21use std::sync::atomic::{AtomicU64, Ordering};
22use std::{collections::BTreeMap, sync::Arc};
23use ui::{ButtonLike, Indicator, List, prelude::*};
24use util::ResultExt;
25
26use crate::AllLanguageModelSettings;
27use crate::ui::InstructionListItem;
28
29const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
30const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
31const OLLAMA_SITE: &str = "https://ollama.com/";
32
33const PROVIDER_ID: &str = "ollama";
34const PROVIDER_NAME: &str = "Ollama";
35
36#[derive(Default, Debug, Clone, PartialEq)]
37pub struct OllamaSettings {
38 pub api_url: String,
39 pub available_models: Vec<AvailableModel>,
40}
41
42#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
43pub struct AvailableModel {
44 /// The model name in the Ollama API (e.g. "llama3.2:latest")
45 pub name: String,
46 /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
47 pub display_name: Option<String>,
48 /// The Context Length parameter to the model (aka num_ctx or n_ctx)
49 pub max_tokens: usize,
50 /// The number of seconds to keep the connection open after the last request
51 pub keep_alive: Option<KeepAlive>,
52 /// Whether the model supports tools
53 pub supports_tools: Option<bool>,
54 /// Whether the model supports vision
55 pub supports_images: Option<bool>,
56 /// Whether to enable think mode
57 pub supports_thinking: Option<bool>,
58}
59
60pub struct OllamaLanguageModelProvider {
61 http_client: Arc<dyn HttpClient>,
62 state: gpui::Entity<State>,
63}
64
65pub struct State {
66 http_client: Arc<dyn HttpClient>,
67 available_models: Vec<ollama::Model>,
68 fetch_model_task: Option<Task<Result<()>>>,
69 _subscription: Subscription,
70}
71
72impl State {
73 fn is_authenticated(&self) -> bool {
74 !self.available_models.is_empty()
75 }
76
77 fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
78 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
79 let http_client = Arc::clone(&self.http_client);
80 let api_url = settings.api_url.clone();
81
82 // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
83 cx.spawn(async move |this, cx| {
84 let models = get_models(http_client.as_ref(), &api_url, None).await?;
85
86 let tasks = models
87 .into_iter()
88 // Since there is no metadata from the Ollama API
89 // indicating which models are embedding models,
90 // simply filter out models with "-embed" in their name
91 .filter(|model| !model.name.contains("-embed"))
92 .map(|model| {
93 let http_client = Arc::clone(&http_client);
94 let api_url = api_url.clone();
95 async move {
96 let name = model.name.as_str();
97 let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
98 let ollama_model = ollama::Model::new(
99 name,
100 None,
101 None,
102 Some(capabilities.supports_tools()),
103 Some(capabilities.supports_vision()),
104 Some(capabilities.supports_thinking()),
105 );
106 Ok(ollama_model)
107 }
108 });
109
110 // Rate-limit capability fetches
111 // since there is an arbitrary number of models available
112 let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
113 .buffer_unordered(5)
114 .collect::<Vec<Result<_>>>()
115 .await
116 .into_iter()
117 .collect::<Result<Vec<_>>>()?;
118
119 ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
120
121 this.update(cx, |this, cx| {
122 this.available_models = ollama_models;
123 cx.notify();
124 })
125 })
126 }
127
128 fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
129 let task = self.fetch_models(cx);
130 self.fetch_model_task.replace(task);
131 }
132
133 fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
134 if self.is_authenticated() {
135 return Task::ready(Ok(()));
136 }
137
138 let fetch_models_task = self.fetch_models(cx);
139 cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
140 }
141}
142
143impl OllamaLanguageModelProvider {
144 pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
145 let this = Self {
146 http_client: http_client.clone(),
147 state: cx.new(|cx| {
148 let subscription = cx.observe_global::<SettingsStore>({
149 let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
150 move |this: &mut State, cx| {
151 let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
152 if &settings != new_settings {
153 settings = new_settings.clone();
154 this.restart_fetch_models_task(cx);
155 cx.notify();
156 }
157 }
158 });
159
160 State {
161 http_client,
162 available_models: Default::default(),
163 fetch_model_task: None,
164 _subscription: subscription,
165 }
166 }),
167 };
168 this.state
169 .update(cx, |state, cx| state.restart_fetch_models_task(cx));
170 this
171 }
172}
173
174impl LanguageModelProviderState for OllamaLanguageModelProvider {
175 type ObservableEntity = State;
176
177 fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
178 Some(self.state.clone())
179 }
180}
181
182impl LanguageModelProvider for OllamaLanguageModelProvider {
183 fn id(&self) -> LanguageModelProviderId {
184 LanguageModelProviderId(PROVIDER_ID.into())
185 }
186
187 fn name(&self) -> LanguageModelProviderName {
188 LanguageModelProviderName(PROVIDER_NAME.into())
189 }
190
191 fn icon(&self) -> IconName {
192 IconName::AiOllama
193 }
194
195 fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
196 self.provided_models(cx).into_iter().next()
197 }
198
199 fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
200 self.default_model(cx)
201 }
202
203 fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
204 let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
205
206 // Add models from the Ollama API
207 for model in self.state.read(cx).available_models.iter() {
208 models.insert(model.name.clone(), model.clone());
209 }
210
211 // Override with available models from settings
212 for model in AllLanguageModelSettings::get_global(cx)
213 .ollama
214 .available_models
215 .iter()
216 {
217 models.insert(
218 model.name.clone(),
219 ollama::Model {
220 name: model.name.clone(),
221 display_name: model.display_name.clone(),
222 max_tokens: model.max_tokens,
223 keep_alive: model.keep_alive.clone(),
224 supports_tools: model.supports_tools,
225 supports_vision: model.supports_images,
226 supports_thinking: model.supports_thinking,
227 },
228 );
229 }
230
231 models
232 .into_values()
233 .map(|model| {
234 Arc::new(OllamaLanguageModel {
235 id: LanguageModelId::from(model.name.clone()),
236 model: model.clone(),
237 http_client: self.http_client.clone(),
238 request_limiter: RateLimiter::new(4),
239 }) as Arc<dyn LanguageModel>
240 })
241 .collect()
242 }
243
244 fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &App) {
245 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
246 let http_client = self.http_client.clone();
247 let api_url = settings.api_url.clone();
248 let id = model.id().0.to_string();
249 cx.spawn(async move |_| preload_model(http_client, &api_url, &id).await)
250 .detach_and_log_err(cx);
251 }
252
253 fn is_authenticated(&self, cx: &App) -> bool {
254 self.state.read(cx).is_authenticated()
255 }
256
257 fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
258 self.state.update(cx, |state, cx| state.authenticate(cx))
259 }
260
261 fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView {
262 let state = self.state.clone();
263 cx.new(|cx| ConfigurationView::new(state, window, cx))
264 .into()
265 }
266
267 fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
268 self.state.update(cx, |state, cx| state.fetch_models(cx))
269 }
270}
271
272pub struct OllamaLanguageModel {
273 id: LanguageModelId,
274 model: ollama::Model,
275 http_client: Arc<dyn HttpClient>,
276 request_limiter: RateLimiter,
277}
278
279impl OllamaLanguageModel {
280 fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
281 let supports_vision = self.model.supports_vision.unwrap_or(false);
282
283 ChatRequest {
284 model: self.model.name.clone(),
285 messages: request
286 .messages
287 .into_iter()
288 .map(|msg| {
289 let images = if supports_vision {
290 msg.content
291 .iter()
292 .filter_map(|content| match content {
293 MessageContent::Image(image) => Some(image.source.to_string()),
294 _ => None,
295 })
296 .collect::<Vec<String>>()
297 } else {
298 vec![]
299 };
300
301 match msg.role {
302 Role::User => ChatMessage::User {
303 content: msg.string_contents(),
304 images: if images.is_empty() {
305 None
306 } else {
307 Some(images)
308 },
309 },
310 Role::Assistant => {
311 let content = msg.string_contents();
312 let thinking =
313 msg.content.into_iter().find_map(|content| match content {
314 MessageContent::Thinking { text, .. } if !text.is_empty() => {
315 Some(text)
316 }
317 _ => None,
318 });
319 ChatMessage::Assistant {
320 content,
321 tool_calls: None,
322 images: if images.is_empty() {
323 None
324 } else {
325 Some(images)
326 },
327 thinking,
328 }
329 }
330 Role::System => ChatMessage::System {
331 content: msg.string_contents(),
332 },
333 }
334 })
335 .collect(),
336 keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
337 stream: true,
338 options: Some(ChatOptions {
339 num_ctx: Some(self.model.max_tokens),
340 stop: Some(request.stop),
341 temperature: request.temperature.or(Some(1.0)),
342 ..Default::default()
343 }),
344 think: self.model.supports_thinking,
345 tools: request.tools.into_iter().map(tool_into_ollama).collect(),
346 }
347 }
348}
349
350impl LanguageModel for OllamaLanguageModel {
351 fn id(&self) -> LanguageModelId {
352 self.id.clone()
353 }
354
355 fn name(&self) -> LanguageModelName {
356 LanguageModelName::from(self.model.display_name().to_string())
357 }
358
359 fn provider_id(&self) -> LanguageModelProviderId {
360 LanguageModelProviderId(PROVIDER_ID.into())
361 }
362
363 fn provider_name(&self) -> LanguageModelProviderName {
364 LanguageModelProviderName(PROVIDER_NAME.into())
365 }
366
367 fn supports_tools(&self) -> bool {
368 self.model.supports_tools.unwrap_or(false)
369 }
370
371 fn supports_images(&self) -> bool {
372 self.model.supports_vision.unwrap_or(false)
373 }
374
375 fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
376 match choice {
377 LanguageModelToolChoice::Auto => false,
378 LanguageModelToolChoice::Any => false,
379 LanguageModelToolChoice::None => false,
380 }
381 }
382
383 fn telemetry_id(&self) -> String {
384 format!("ollama/{}", self.model.id())
385 }
386
387 fn max_token_count(&self) -> usize {
388 self.model.max_token_count()
389 }
390
391 fn count_tokens(
392 &self,
393 request: LanguageModelRequest,
394 _cx: &App,
395 ) -> BoxFuture<'static, Result<usize>> {
396 // There is no endpoint for this _yet_ in Ollama
397 // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
398 let token_count = request
399 .messages
400 .iter()
401 .map(|msg| msg.string_contents().chars().count())
402 .sum::<usize>()
403 / 4;
404
405 async move { Ok(token_count) }.boxed()
406 }
407
408 fn stream_completion(
409 &self,
410 request: LanguageModelRequest,
411 cx: &AsyncApp,
412 ) -> BoxFuture<
413 'static,
414 Result<
415 BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
416 >,
417 > {
418 let request = self.to_ollama_request(request);
419
420 let http_client = self.http_client.clone();
421 let Ok(api_url) = cx.update(|cx| {
422 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
423 settings.api_url.clone()
424 }) else {
425 return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
426 };
427
428 let future = self.request_limiter.stream(async move {
429 let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
430 let stream = map_to_language_model_completion_events(stream);
431 Ok(stream)
432 });
433
434 future.map_ok(|f| f.boxed()).boxed()
435 }
436}
437
438fn map_to_language_model_completion_events(
439 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
440) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
441 // Used for creating unique tool use ids
442 static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
443
444 struct State {
445 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
446 used_tools: bool,
447 }
448
449 // We need to create a ToolUse and Stop event from a single
450 // response from the original stream
451 let stream = stream::unfold(
452 State {
453 stream,
454 used_tools: false,
455 },
456 async move |mut state| {
457 let response = state.stream.next().await?;
458
459 let delta = match response {
460 Ok(delta) => delta,
461 Err(e) => {
462 let event = Err(LanguageModelCompletionError::Other(anyhow!(e)));
463 return Some((vec![event], state));
464 }
465 };
466
467 let mut events = Vec::new();
468
469 match delta.message {
470 ChatMessage::User { content, images: _ } => {
471 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
472 }
473 ChatMessage::System { content } => {
474 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
475 }
476 ChatMessage::Assistant {
477 content,
478 tool_calls,
479 images: _,
480 thinking,
481 } => {
482 if let Some(text) = thinking {
483 events.push(Ok(LanguageModelCompletionEvent::Thinking {
484 text,
485 signature: None,
486 }));
487 }
488
489 if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
490 match tool_call {
491 OllamaToolCall::Function(function) => {
492 let tool_id = format!(
493 "{}-{}",
494 &function.name,
495 TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
496 );
497 let event =
498 LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
499 id: LanguageModelToolUseId::from(tool_id),
500 name: Arc::from(function.name),
501 raw_input: function.arguments.to_string(),
502 input: function.arguments,
503 is_input_complete: true,
504 });
505 events.push(Ok(event));
506 state.used_tools = true;
507 }
508 }
509 } else if !content.is_empty() {
510 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
511 }
512 }
513 };
514
515 if delta.done {
516 if state.used_tools {
517 state.used_tools = false;
518 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
519 } else {
520 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
521 }
522 }
523
524 Some((events, state))
525 },
526 );
527
528 stream.flat_map(futures::stream::iter)
529}
530
531struct ConfigurationView {
532 state: gpui::Entity<State>,
533 loading_models_task: Option<Task<()>>,
534}
535
536impl ConfigurationView {
537 pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
538 let loading_models_task = Some(cx.spawn_in(window, {
539 let state = state.clone();
540 async move |this, cx| {
541 if let Some(task) = state
542 .update(cx, |state, cx| state.authenticate(cx))
543 .log_err()
544 {
545 task.await.log_err();
546 }
547 this.update(cx, |this, cx| {
548 this.loading_models_task = None;
549 cx.notify();
550 })
551 .log_err();
552 }
553 }));
554
555 Self {
556 state,
557 loading_models_task,
558 }
559 }
560
561 fn retry_connection(&self, cx: &mut App) {
562 self.state
563 .update(cx, |state, cx| state.fetch_models(cx))
564 .detach_and_log_err(cx);
565 }
566}
567
568impl Render for ConfigurationView {
569 fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
570 let is_authenticated = self.state.read(cx).is_authenticated();
571
572 let ollama_intro =
573 "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
574
575 if self.loading_models_task.is_some() {
576 div().child(Label::new("Loading models...")).into_any()
577 } else {
578 v_flex()
579 .gap_2()
580 .child(
581 v_flex().gap_1().child(Label::new(ollama_intro)).child(
582 List::new()
583 .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
584 .child(InstructionListItem::text_only(
585 "Once installed, try `ollama run llama3.2`",
586 )),
587 ),
588 )
589 .child(
590 h_flex()
591 .w_full()
592 .justify_between()
593 .gap_2()
594 .child(
595 h_flex()
596 .w_full()
597 .gap_2()
598 .map(|this| {
599 if is_authenticated {
600 this.child(
601 Button::new("ollama-site", "Ollama")
602 .style(ButtonStyle::Subtle)
603 .icon(IconName::ArrowUpRight)
604 .icon_size(IconSize::XSmall)
605 .icon_color(Color::Muted)
606 .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
607 .into_any_element(),
608 )
609 } else {
610 this.child(
611 Button::new(
612 "download_ollama_button",
613 "Download Ollama",
614 )
615 .style(ButtonStyle::Subtle)
616 .icon(IconName::ArrowUpRight)
617 .icon_size(IconSize::XSmall)
618 .icon_color(Color::Muted)
619 .on_click(move |_, _, cx| {
620 cx.open_url(OLLAMA_DOWNLOAD_URL)
621 })
622 .into_any_element(),
623 )
624 }
625 })
626 .child(
627 Button::new("view-models", "All Models")
628 .style(ButtonStyle::Subtle)
629 .icon(IconName::ArrowUpRight)
630 .icon_size(IconSize::XSmall)
631 .icon_color(Color::Muted)
632 .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
633 ),
634 )
635 .map(|this| {
636 if is_authenticated {
637 this.child(
638 ButtonLike::new("connected")
639 .disabled(true)
640 .cursor_style(gpui::CursorStyle::Arrow)
641 .child(
642 h_flex()
643 .gap_2()
644 .child(Indicator::dot().color(Color::Success))
645 .child(Label::new("Connected"))
646 .into_any_element(),
647 ),
648 )
649 } else {
650 this.child(
651 Button::new("retry_ollama_models", "Connect")
652 .icon_position(IconPosition::Start)
653 .icon_size(IconSize::XSmall)
654 .icon(IconName::Play)
655 .on_click(cx.listener(move |this, _, _, cx| {
656 this.retry_connection(cx)
657 })),
658 )
659 }
660 })
661 )
662 .into_any()
663 }
664 }
665}
666
667fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
668 ollama::OllamaTool::Function {
669 function: OllamaFunctionTool {
670 name: tool.name,
671 description: Some(tool.description),
672 parameters: Some(tool.input_schema),
673 },
674 }
675}