1use anyhow::{Result, anyhow};
2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
3use futures::{Stream, TryFutureExt, stream};
4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
5use http_client::HttpClient;
6use language_model::{
7 AuthenticateError, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
8 LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
9 LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
10 LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
11 LanguageModelToolUseId, MessageContent, RateLimiter, Role, StopReason, TokenUsage,
12};
13use ollama::{
14 ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionCall,
15 OllamaFunctionTool, OllamaToolCall, get_models, show_model, stream_chat_completion,
16};
17use schemars::JsonSchema;
18use serde::{Deserialize, Serialize};
19use settings::{Settings, SettingsStore};
20use std::pin::Pin;
21use std::sync::atomic::{AtomicU64, Ordering};
22use std::{collections::HashMap, sync::Arc};
23use ui::{ButtonLike, Indicator, List, prelude::*};
24use util::ResultExt;
25
26use crate::AllLanguageModelSettings;
27use crate::ui::InstructionListItem;
28
29const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
30const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
31const OLLAMA_SITE: &str = "https://ollama.com/";
32
33const PROVIDER_ID: LanguageModelProviderId = LanguageModelProviderId::new("ollama");
34const PROVIDER_NAME: LanguageModelProviderName = LanguageModelProviderName::new("Ollama");
35
36#[derive(Default, Debug, Clone, PartialEq)]
37pub struct OllamaSettings {
38 pub api_url: String,
39 pub available_models: Vec<AvailableModel>,
40}
41
42#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
43pub struct AvailableModel {
44 /// The model name in the Ollama API (e.g. "llama3.2:latest")
45 pub name: String,
46 /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
47 pub display_name: Option<String>,
48 /// The Context Length parameter to the model (aka num_ctx or n_ctx)
49 pub max_tokens: u64,
50 /// The number of seconds to keep the connection open after the last request
51 pub keep_alive: Option<KeepAlive>,
52 /// Whether the model supports tools
53 pub supports_tools: Option<bool>,
54 /// Whether the model supports vision
55 pub supports_images: Option<bool>,
56 /// Whether to enable think mode
57 pub supports_thinking: Option<bool>,
58}
59
60pub struct OllamaLanguageModelProvider {
61 http_client: Arc<dyn HttpClient>,
62 state: gpui::Entity<State>,
63}
64
65pub struct State {
66 http_client: Arc<dyn HttpClient>,
67 available_models: Vec<ollama::Model>,
68 fetch_model_task: Option<Task<Result<()>>>,
69 _subscription: Subscription,
70}
71
72impl State {
73 fn is_authenticated(&self) -> bool {
74 !self.available_models.is_empty()
75 }
76
77 fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
78 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
79 let http_client = Arc::clone(&self.http_client);
80 let api_url = settings.api_url.clone();
81
82 // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
83 cx.spawn(async move |this, cx| {
84 let models = get_models(http_client.as_ref(), &api_url, None).await?;
85
86 let tasks = models
87 .into_iter()
88 // Since there is no metadata from the Ollama API
89 // indicating which models are embedding models,
90 // simply filter out models with "-embed" in their name
91 .filter(|model| !model.name.contains("-embed"))
92 .map(|model| {
93 let http_client = Arc::clone(&http_client);
94 let api_url = api_url.clone();
95 async move {
96 let name = model.name.as_str();
97 let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
98 let ollama_model = ollama::Model::new(
99 name,
100 None,
101 None,
102 Some(capabilities.supports_tools()),
103 Some(capabilities.supports_vision()),
104 Some(capabilities.supports_thinking()),
105 );
106 Ok(ollama_model)
107 }
108 });
109
110 // Rate-limit capability fetches
111 // since there is an arbitrary number of models available
112 let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
113 .buffer_unordered(5)
114 .collect::<Vec<Result<_>>>()
115 .await
116 .into_iter()
117 .collect::<Result<Vec<_>>>()?;
118
119 ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
120
121 this.update(cx, |this, cx| {
122 this.available_models = ollama_models;
123 cx.notify();
124 })
125 })
126 }
127
128 fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
129 let task = self.fetch_models(cx);
130 self.fetch_model_task.replace(task);
131 }
132
133 fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
134 if self.is_authenticated() {
135 return Task::ready(Ok(()));
136 }
137
138 let fetch_models_task = self.fetch_models(cx);
139 cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
140 }
141}
142
143impl OllamaLanguageModelProvider {
144 pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
145 let this = Self {
146 http_client: http_client.clone(),
147 state: cx.new(|cx| {
148 let subscription = cx.observe_global::<SettingsStore>({
149 let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
150 move |this: &mut State, cx| {
151 let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
152 if &settings != new_settings {
153 settings = new_settings.clone();
154 this.restart_fetch_models_task(cx);
155 cx.notify();
156 }
157 }
158 });
159
160 State {
161 http_client,
162 available_models: Default::default(),
163 fetch_model_task: None,
164 _subscription: subscription,
165 }
166 }),
167 };
168 this.state
169 .update(cx, |state, cx| state.restart_fetch_models_task(cx));
170 this
171 }
172}
173
174impl LanguageModelProviderState for OllamaLanguageModelProvider {
175 type ObservableEntity = State;
176
177 fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
178 Some(self.state.clone())
179 }
180}
181
182impl LanguageModelProvider for OllamaLanguageModelProvider {
183 fn id(&self) -> LanguageModelProviderId {
184 PROVIDER_ID
185 }
186
187 fn name(&self) -> LanguageModelProviderName {
188 PROVIDER_NAME
189 }
190
191 fn icon(&self) -> IconName {
192 IconName::AiOllama
193 }
194
195 fn default_model(&self, _: &App) -> Option<Arc<dyn LanguageModel>> {
196 // We shouldn't try to select default model, because it might lead to a load call for an unloaded model.
197 // In a constrained environment where user might not have enough resources it'll be a bad UX to select something
198 // to load by default.
199 None
200 }
201
202 fn default_fast_model(&self, _: &App) -> Option<Arc<dyn LanguageModel>> {
203 // See explanation for default_model.
204 None
205 }
206
207 fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
208 let mut models: HashMap<String, ollama::Model> = HashMap::new();
209
210 // Add models from the Ollama API
211 for model in self.state.read(cx).available_models.iter() {
212 models.insert(model.name.clone(), model.clone());
213 }
214
215 // Override with available models from settings
216 for model in AllLanguageModelSettings::get_global(cx)
217 .ollama
218 .available_models
219 .iter()
220 {
221 models.insert(
222 model.name.clone(),
223 ollama::Model {
224 name: model.name.clone(),
225 display_name: model.display_name.clone(),
226 max_tokens: model.max_tokens,
227 keep_alive: model.keep_alive.clone(),
228 supports_tools: model.supports_tools,
229 supports_vision: model.supports_images,
230 supports_thinking: model.supports_thinking,
231 },
232 );
233 }
234
235 let mut models = models
236 .into_values()
237 .map(|model| {
238 Arc::new(OllamaLanguageModel {
239 id: LanguageModelId::from(model.name.clone()),
240 model,
241 http_client: self.http_client.clone(),
242 request_limiter: RateLimiter::new(4),
243 }) as Arc<dyn LanguageModel>
244 })
245 .collect::<Vec<_>>();
246 models.sort_by_key(|model| model.name());
247 models
248 }
249
250 fn is_authenticated(&self, cx: &App) -> bool {
251 self.state.read(cx).is_authenticated()
252 }
253
254 fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
255 self.state.update(cx, |state, cx| state.authenticate(cx))
256 }
257
258 fn configuration_view(
259 &self,
260 _target_agent: language_model::ConfigurationViewTargetAgent,
261 window: &mut Window,
262 cx: &mut App,
263 ) -> AnyView {
264 let state = self.state.clone();
265 cx.new(|cx| ConfigurationView::new(state, window, cx))
266 .into()
267 }
268
269 fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
270 self.state.update(cx, |state, cx| state.fetch_models(cx))
271 }
272}
273
274pub struct OllamaLanguageModel {
275 id: LanguageModelId,
276 model: ollama::Model,
277 http_client: Arc<dyn HttpClient>,
278 request_limiter: RateLimiter,
279}
280
281impl OllamaLanguageModel {
282 fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
283 let supports_vision = self.model.supports_vision.unwrap_or(false);
284
285 let mut messages = Vec::with_capacity(request.messages.len());
286
287 for mut msg in request.messages.into_iter() {
288 let images = if supports_vision {
289 msg.content
290 .iter()
291 .filter_map(|content| match content {
292 MessageContent::Image(image) => Some(image.source.to_string()),
293 _ => None,
294 })
295 .collect::<Vec<String>>()
296 } else {
297 vec![]
298 };
299
300 match msg.role {
301 Role::User => {
302 for tool_result in msg
303 .content
304 .extract_if(.., |x| matches!(x, MessageContent::ToolResult(..)))
305 {
306 match tool_result {
307 MessageContent::ToolResult(tool_result) => {
308 messages.push(ChatMessage::Tool {
309 tool_name: tool_result.tool_name.to_string(),
310 content: tool_result.content.to_str().unwrap_or("").to_string(),
311 })
312 }
313 _ => unreachable!("Only tool result should be extracted"),
314 }
315 }
316 if !msg.content.is_empty() {
317 messages.push(ChatMessage::User {
318 content: msg.string_contents(),
319 images: if images.is_empty() {
320 None
321 } else {
322 Some(images)
323 },
324 })
325 }
326 }
327 Role::Assistant => {
328 let content = msg.string_contents();
329 let mut thinking = None;
330 let mut tool_calls = Vec::new();
331 for content in msg.content.into_iter() {
332 match content {
333 MessageContent::Thinking { text, .. } if !text.is_empty() => {
334 thinking = Some(text)
335 }
336 MessageContent::ToolUse(tool_use) => {
337 tool_calls.push(OllamaToolCall::Function(OllamaFunctionCall {
338 name: tool_use.name.to_string(),
339 arguments: tool_use.input,
340 }));
341 }
342 _ => (),
343 }
344 }
345 messages.push(ChatMessage::Assistant {
346 content,
347 tool_calls: Some(tool_calls),
348 images: if images.is_empty() {
349 None
350 } else {
351 Some(images)
352 },
353 thinking,
354 })
355 }
356 Role::System => messages.push(ChatMessage::System {
357 content: msg.string_contents(),
358 }),
359 }
360 }
361 ChatRequest {
362 model: self.model.name.clone(),
363 messages,
364 keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
365 stream: true,
366 options: Some(ChatOptions {
367 num_ctx: Some(self.model.max_tokens),
368 stop: Some(request.stop),
369 temperature: request.temperature.or(Some(1.0)),
370 ..Default::default()
371 }),
372 think: self
373 .model
374 .supports_thinking
375 .map(|supports_thinking| supports_thinking && request.thinking_allowed),
376 tools: if self.model.supports_tools.unwrap_or(false) {
377 request.tools.into_iter().map(tool_into_ollama).collect()
378 } else {
379 vec![]
380 },
381 }
382 }
383}
384
385impl LanguageModel for OllamaLanguageModel {
386 fn id(&self) -> LanguageModelId {
387 self.id.clone()
388 }
389
390 fn name(&self) -> LanguageModelName {
391 LanguageModelName::from(self.model.display_name().to_string())
392 }
393
394 fn provider_id(&self) -> LanguageModelProviderId {
395 PROVIDER_ID
396 }
397
398 fn provider_name(&self) -> LanguageModelProviderName {
399 PROVIDER_NAME
400 }
401
402 fn supports_tools(&self) -> bool {
403 self.model.supports_tools.unwrap_or(false)
404 }
405
406 fn supports_images(&self) -> bool {
407 self.model.supports_vision.unwrap_or(false)
408 }
409
410 fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
411 match choice {
412 LanguageModelToolChoice::Auto => false,
413 LanguageModelToolChoice::Any => false,
414 LanguageModelToolChoice::None => false,
415 }
416 }
417
418 fn telemetry_id(&self) -> String {
419 format!("ollama/{}", self.model.id())
420 }
421
422 fn max_token_count(&self) -> u64 {
423 self.model.max_token_count()
424 }
425
426 fn count_tokens(
427 &self,
428 request: LanguageModelRequest,
429 _cx: &App,
430 ) -> BoxFuture<'static, Result<u64>> {
431 // There is no endpoint for this _yet_ in Ollama
432 // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
433 let token_count = request
434 .messages
435 .iter()
436 .map(|msg| msg.string_contents().chars().count())
437 .sum::<usize>()
438 / 4;
439
440 async move { Ok(token_count as u64) }.boxed()
441 }
442
443 fn stream_completion(
444 &self,
445 request: LanguageModelRequest,
446 cx: &AsyncApp,
447 ) -> BoxFuture<
448 'static,
449 Result<
450 BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
451 LanguageModelCompletionError,
452 >,
453 > {
454 let request = self.to_ollama_request(request);
455
456 let http_client = self.http_client.clone();
457 let Ok(api_url) = cx.update(|cx| {
458 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
459 settings.api_url.clone()
460 }) else {
461 return futures::future::ready(Err(anyhow!("App state dropped").into())).boxed();
462 };
463
464 let future = self.request_limiter.stream(async move {
465 let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
466 let stream = map_to_language_model_completion_events(stream);
467 Ok(stream)
468 });
469
470 future.map_ok(|f| f.boxed()).boxed()
471 }
472}
473
474fn map_to_language_model_completion_events(
475 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
476) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
477 // Used for creating unique tool use ids
478 static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
479
480 struct State {
481 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
482 used_tools: bool,
483 }
484
485 // We need to create a ToolUse and Stop event from a single
486 // response from the original stream
487 let stream = stream::unfold(
488 State {
489 stream,
490 used_tools: false,
491 },
492 async move |mut state| {
493 let response = state.stream.next().await?;
494
495 let delta = match response {
496 Ok(delta) => delta,
497 Err(e) => {
498 let event = Err(LanguageModelCompletionError::from(anyhow!(e)));
499 return Some((vec![event], state));
500 }
501 };
502
503 let mut events = Vec::new();
504
505 match delta.message {
506 ChatMessage::User { content, images: _ } => {
507 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
508 }
509 ChatMessage::System { content } => {
510 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
511 }
512 ChatMessage::Tool { content, .. } => {
513 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
514 }
515 ChatMessage::Assistant {
516 content,
517 tool_calls,
518 images: _,
519 thinking,
520 } => {
521 if let Some(text) = thinking {
522 events.push(Ok(LanguageModelCompletionEvent::Thinking {
523 text,
524 signature: None,
525 }));
526 }
527
528 if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
529 match tool_call {
530 OllamaToolCall::Function(function) => {
531 let tool_id = format!(
532 "{}-{}",
533 &function.name,
534 TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
535 );
536 let event =
537 LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
538 id: LanguageModelToolUseId::from(tool_id),
539 name: Arc::from(function.name),
540 raw_input: function.arguments.to_string(),
541 input: function.arguments,
542 is_input_complete: true,
543 });
544 events.push(Ok(event));
545 state.used_tools = true;
546 }
547 }
548 } else if !content.is_empty() {
549 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
550 }
551 }
552 };
553
554 if delta.done {
555 events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
556 input_tokens: delta.prompt_eval_count.unwrap_or(0),
557 output_tokens: delta.eval_count.unwrap_or(0),
558 cache_creation_input_tokens: 0,
559 cache_read_input_tokens: 0,
560 })));
561 if state.used_tools {
562 state.used_tools = false;
563 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
564 } else {
565 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
566 }
567 }
568
569 Some((events, state))
570 },
571 );
572
573 stream.flat_map(futures::stream::iter)
574}
575
576struct ConfigurationView {
577 state: gpui::Entity<State>,
578 loading_models_task: Option<Task<()>>,
579}
580
581impl ConfigurationView {
582 pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
583 let loading_models_task = Some(cx.spawn_in(window, {
584 let state = state.clone();
585 async move |this, cx| {
586 if let Some(task) = state
587 .update(cx, |state, cx| state.authenticate(cx))
588 .log_err()
589 {
590 task.await.log_err();
591 }
592 this.update(cx, |this, cx| {
593 this.loading_models_task = None;
594 cx.notify();
595 })
596 .log_err();
597 }
598 }));
599
600 Self {
601 state,
602 loading_models_task,
603 }
604 }
605
606 fn retry_connection(&self, cx: &mut App) {
607 self.state
608 .update(cx, |state, cx| state.fetch_models(cx))
609 .detach_and_log_err(cx);
610 }
611}
612
613impl Render for ConfigurationView {
614 fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
615 let is_authenticated = self.state.read(cx).is_authenticated();
616
617 let ollama_intro =
618 "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
619
620 if self.loading_models_task.is_some() {
621 div().child(Label::new("Loading models...")).into_any()
622 } else {
623 v_flex()
624 .gap_2()
625 .child(
626 v_flex().gap_1().child(Label::new(ollama_intro)).child(
627 List::new()
628 .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
629 .child(InstructionListItem::text_only(
630 "Once installed, try `ollama run llama3.2`",
631 )),
632 ),
633 )
634 .child(
635 h_flex()
636 .w_full()
637 .justify_between()
638 .gap_2()
639 .child(
640 h_flex()
641 .w_full()
642 .gap_2()
643 .map(|this| {
644 if is_authenticated {
645 this.child(
646 Button::new("ollama-site", "Ollama")
647 .style(ButtonStyle::Subtle)
648 .icon(IconName::ArrowUpRight)
649 .icon_size(IconSize::Small)
650 .icon_color(Color::Muted)
651 .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
652 .into_any_element(),
653 )
654 } else {
655 this.child(
656 Button::new(
657 "download_ollama_button",
658 "Download Ollama",
659 )
660 .style(ButtonStyle::Subtle)
661 .icon(IconName::ArrowUpRight)
662 .icon_size(IconSize::Small)
663 .icon_color(Color::Muted)
664 .on_click(move |_, _, cx| {
665 cx.open_url(OLLAMA_DOWNLOAD_URL)
666 })
667 .into_any_element(),
668 )
669 }
670 })
671 .child(
672 Button::new("view-models", "View All Models")
673 .style(ButtonStyle::Subtle)
674 .icon(IconName::ArrowUpRight)
675 .icon_size(IconSize::Small)
676 .icon_color(Color::Muted)
677 .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
678 ),
679 )
680 .map(|this| {
681 if is_authenticated {
682 this.child(
683 ButtonLike::new("connected")
684 .disabled(true)
685 .cursor_style(gpui::CursorStyle::Arrow)
686 .child(
687 h_flex()
688 .gap_2()
689 .child(Indicator::dot().color(Color::Success))
690 .child(Label::new("Connected"))
691 .into_any_element(),
692 ),
693 )
694 } else {
695 this.child(
696 Button::new("retry_ollama_models", "Connect")
697 .icon_position(IconPosition::Start)
698 .icon_size(IconSize::XSmall)
699 .icon(IconName::PlayFilled)
700 .on_click(cx.listener(move |this, _, _, cx| {
701 this.retry_connection(cx)
702 })),
703 )
704 }
705 })
706 )
707 .into_any()
708 }
709 }
710}
711
712fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
713 ollama::OllamaTool::Function {
714 function: OllamaFunctionTool {
715 name: tool.name,
716 description: Some(tool.description),
717 parameters: Some(tool.input_schema),
718 },
719 }
720}