1use anyhow::{Result, anyhow};
2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
3use futures::{Stream, TryFutureExt, stream};
4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
5use http_client::HttpClient;
6use language_model::{
7 AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
8 LanguageModelRequestTool, LanguageModelToolUse, LanguageModelToolUseId, StopReason,
9};
10use language_model::{
11 LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
12 LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
13 LanguageModelRequest, RateLimiter, Role,
14};
15use ollama::{
16 ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionTool,
17 OllamaToolCall, get_models, preload_model, show_model, stream_chat_completion,
18};
19use schemars::JsonSchema;
20use serde::{Deserialize, Serialize};
21use settings::{Settings, SettingsStore};
22use std::pin::Pin;
23use std::sync::atomic::{AtomicU64, Ordering};
24use std::{collections::BTreeMap, sync::Arc};
25use ui::{ButtonLike, Indicator, List, prelude::*};
26use util::ResultExt;
27
28use crate::AllLanguageModelSettings;
29use crate::ui::InstructionListItem;
30
31const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
32const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
33const OLLAMA_SITE: &str = "https://ollama.com/";
34
35const PROVIDER_ID: &str = "ollama";
36const PROVIDER_NAME: &str = "Ollama";
37
38#[derive(Default, Debug, Clone, PartialEq)]
39pub struct OllamaSettings {
40 pub api_url: String,
41 pub available_models: Vec<AvailableModel>,
42}
43
44#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
45pub struct AvailableModel {
46 /// The model name in the Ollama API (e.g. "llama3.2:latest")
47 pub name: String,
48 /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
49 pub display_name: Option<String>,
50 /// The Context Length parameter to the model (aka num_ctx or n_ctx)
51 pub max_tokens: usize,
52 /// The number of seconds to keep the connection open after the last request
53 pub keep_alive: Option<KeepAlive>,
54 /// Whether the model supports tools
55 pub supports_tools: bool,
56}
57
58pub struct OllamaLanguageModelProvider {
59 http_client: Arc<dyn HttpClient>,
60 state: gpui::Entity<State>,
61}
62
63pub struct State {
64 http_client: Arc<dyn HttpClient>,
65 available_models: Vec<ollama::Model>,
66 fetch_model_task: Option<Task<Result<()>>>,
67 _subscription: Subscription,
68}
69
70impl State {
71 fn is_authenticated(&self) -> bool {
72 !self.available_models.is_empty()
73 }
74
75 fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
76 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
77 let http_client = Arc::clone(&self.http_client);
78 let api_url = settings.api_url.clone();
79
80 // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
81 cx.spawn(async move |this, cx| {
82 let models = get_models(http_client.as_ref(), &api_url, None).await?;
83
84 let tasks = models
85 .into_iter()
86 // Since there is no metadata from the Ollama API
87 // indicating which models are embedding models,
88 // simply filter out models with "-embed" in their name
89 .filter(|model| !model.name.contains("-embed"))
90 .map(|model| {
91 let http_client = Arc::clone(&http_client);
92 let api_url = api_url.clone();
93 async move {
94 let name = model.name.as_str();
95 let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
96 let ollama_model =
97 ollama::Model::new(name, None, None, capabilities.supports_tools());
98 Ok(ollama_model)
99 }
100 });
101
102 // Rate-limit capability fetches
103 // since there is an arbitrary number of models available
104 let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
105 .buffer_unordered(5)
106 .collect::<Vec<Result<_>>>()
107 .await
108 .into_iter()
109 .collect::<Result<Vec<_>>>()?;
110
111 ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
112
113 this.update(cx, |this, cx| {
114 this.available_models = ollama_models;
115 cx.notify();
116 })
117 })
118 }
119
120 fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
121 let task = self.fetch_models(cx);
122 self.fetch_model_task.replace(task);
123 }
124
125 fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
126 if self.is_authenticated() {
127 return Task::ready(Ok(()));
128 }
129
130 let fetch_models_task = self.fetch_models(cx);
131 cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
132 }
133}
134
135impl OllamaLanguageModelProvider {
136 pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
137 let this = Self {
138 http_client: http_client.clone(),
139 state: cx.new(|cx| {
140 let subscription = cx.observe_global::<SettingsStore>({
141 let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
142 move |this: &mut State, cx| {
143 let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
144 if &settings != new_settings {
145 settings = new_settings.clone();
146 this.restart_fetch_models_task(cx);
147 cx.notify();
148 }
149 }
150 });
151
152 State {
153 http_client,
154 available_models: Default::default(),
155 fetch_model_task: None,
156 _subscription: subscription,
157 }
158 }),
159 };
160 this.state
161 .update(cx, |state, cx| state.restart_fetch_models_task(cx));
162 this
163 }
164}
165
166impl LanguageModelProviderState for OllamaLanguageModelProvider {
167 type ObservableEntity = State;
168
169 fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
170 Some(self.state.clone())
171 }
172}
173
174impl LanguageModelProvider for OllamaLanguageModelProvider {
175 fn id(&self) -> LanguageModelProviderId {
176 LanguageModelProviderId(PROVIDER_ID.into())
177 }
178
179 fn name(&self) -> LanguageModelProviderName {
180 LanguageModelProviderName(PROVIDER_NAME.into())
181 }
182
183 fn icon(&self) -> IconName {
184 IconName::AiOllama
185 }
186
187 fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
188 self.provided_models(cx).into_iter().next()
189 }
190
191 fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
192 self.default_model(cx)
193 }
194
195 fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
196 let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
197
198 // Add models from the Ollama API
199 for model in self.state.read(cx).available_models.iter() {
200 models.insert(model.name.clone(), model.clone());
201 }
202
203 // Override with available models from settings
204 for model in AllLanguageModelSettings::get_global(cx)
205 .ollama
206 .available_models
207 .iter()
208 {
209 models.insert(
210 model.name.clone(),
211 ollama::Model {
212 name: model.name.clone(),
213 display_name: model.display_name.clone(),
214 max_tokens: model.max_tokens,
215 keep_alive: model.keep_alive.clone(),
216 supports_tools: model.supports_tools,
217 },
218 );
219 }
220
221 models
222 .into_values()
223 .map(|model| {
224 Arc::new(OllamaLanguageModel {
225 id: LanguageModelId::from(model.name.clone()),
226 model: model.clone(),
227 http_client: self.http_client.clone(),
228 request_limiter: RateLimiter::new(4),
229 }) as Arc<dyn LanguageModel>
230 })
231 .collect()
232 }
233
234 fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &App) {
235 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
236 let http_client = self.http_client.clone();
237 let api_url = settings.api_url.clone();
238 let id = model.id().0.to_string();
239 cx.spawn(async move |_| preload_model(http_client, &api_url, &id).await)
240 .detach_and_log_err(cx);
241 }
242
243 fn is_authenticated(&self, cx: &App) -> bool {
244 self.state.read(cx).is_authenticated()
245 }
246
247 fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
248 self.state.update(cx, |state, cx| state.authenticate(cx))
249 }
250
251 fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView {
252 let state = self.state.clone();
253 cx.new(|cx| ConfigurationView::new(state, window, cx))
254 .into()
255 }
256
257 fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
258 self.state.update(cx, |state, cx| state.fetch_models(cx))
259 }
260}
261
262pub struct OllamaLanguageModel {
263 id: LanguageModelId,
264 model: ollama::Model,
265 http_client: Arc<dyn HttpClient>,
266 request_limiter: RateLimiter,
267}
268
269impl OllamaLanguageModel {
270 fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
271 ChatRequest {
272 model: self.model.name.clone(),
273 messages: request
274 .messages
275 .into_iter()
276 .map(|msg| match msg.role {
277 Role::User => ChatMessage::User {
278 content: msg.string_contents(),
279 },
280 Role::Assistant => ChatMessage::Assistant {
281 content: msg.string_contents(),
282 tool_calls: None,
283 },
284 Role::System => ChatMessage::System {
285 content: msg.string_contents(),
286 },
287 })
288 .collect(),
289 keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
290 stream: true,
291 options: Some(ChatOptions {
292 num_ctx: Some(self.model.max_tokens),
293 stop: Some(request.stop),
294 temperature: request.temperature.or(Some(1.0)),
295 ..Default::default()
296 }),
297 tools: request.tools.into_iter().map(tool_into_ollama).collect(),
298 }
299 }
300}
301
302impl LanguageModel for OllamaLanguageModel {
303 fn id(&self) -> LanguageModelId {
304 self.id.clone()
305 }
306
307 fn name(&self) -> LanguageModelName {
308 LanguageModelName::from(self.model.display_name().to_string())
309 }
310
311 fn provider_id(&self) -> LanguageModelProviderId {
312 LanguageModelProviderId(PROVIDER_ID.into())
313 }
314
315 fn provider_name(&self) -> LanguageModelProviderName {
316 LanguageModelProviderName(PROVIDER_NAME.into())
317 }
318
319 fn supports_tools(&self) -> bool {
320 self.model.supports_tools
321 }
322
323 fn telemetry_id(&self) -> String {
324 format!("ollama/{}", self.model.id())
325 }
326
327 fn max_token_count(&self) -> usize {
328 self.model.max_token_count()
329 }
330
331 fn count_tokens(
332 &self,
333 request: LanguageModelRequest,
334 _cx: &App,
335 ) -> BoxFuture<'static, Result<usize>> {
336 // There is no endpoint for this _yet_ in Ollama
337 // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
338 let token_count = request
339 .messages
340 .iter()
341 .map(|msg| msg.string_contents().chars().count())
342 .sum::<usize>()
343 / 4;
344
345 async move { Ok(token_count) }.boxed()
346 }
347
348 fn stream_completion(
349 &self,
350 request: LanguageModelRequest,
351 cx: &AsyncApp,
352 ) -> BoxFuture<
353 'static,
354 Result<
355 BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
356 >,
357 > {
358 let request = self.to_ollama_request(request);
359
360 let http_client = self.http_client.clone();
361 let Ok(api_url) = cx.update(|cx| {
362 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
363 settings.api_url.clone()
364 }) else {
365 return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
366 };
367
368 let future = self.request_limiter.stream(async move {
369 let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
370 let stream = map_to_language_model_completion_events(stream);
371 Ok(stream)
372 });
373
374 future.map_ok(|f| f.boxed()).boxed()
375 }
376}
377
378fn map_to_language_model_completion_events(
379 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
380) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
381 // Used for creating unique tool use ids
382 static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
383
384 struct State {
385 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
386 used_tools: bool,
387 }
388
389 // We need to create a ToolUse and Stop event from a single
390 // response from the original stream
391 let stream = stream::unfold(
392 State {
393 stream,
394 used_tools: false,
395 },
396 async move |mut state| {
397 let response = state.stream.next().await?;
398
399 let delta = match response {
400 Ok(delta) => delta,
401 Err(e) => {
402 let event = Err(LanguageModelCompletionError::Other(anyhow!(e)));
403 return Some((vec![event], state));
404 }
405 };
406
407 let mut events = Vec::new();
408
409 match delta.message {
410 ChatMessage::User { content } => {
411 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
412 }
413 ChatMessage::System { content } => {
414 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
415 }
416 ChatMessage::Assistant {
417 content,
418 tool_calls,
419 } => {
420 // Check for tool calls
421 if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
422 match tool_call {
423 OllamaToolCall::Function(function) => {
424 let tool_id = format!(
425 "{}-{}",
426 &function.name,
427 TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
428 );
429 let event =
430 LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
431 id: LanguageModelToolUseId::from(tool_id),
432 name: Arc::from(function.name),
433 raw_input: function.arguments.to_string(),
434 input: function.arguments,
435 is_input_complete: true,
436 });
437 events.push(Ok(event));
438 state.used_tools = true;
439 }
440 }
441 } else {
442 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
443 }
444 }
445 };
446
447 if delta.done {
448 if state.used_tools {
449 state.used_tools = false;
450 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
451 } else {
452 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
453 }
454 }
455
456 Some((events, state))
457 },
458 );
459
460 stream.flat_map(futures::stream::iter)
461}
462
463struct ConfigurationView {
464 state: gpui::Entity<State>,
465 loading_models_task: Option<Task<()>>,
466}
467
468impl ConfigurationView {
469 pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
470 let loading_models_task = Some(cx.spawn_in(window, {
471 let state = state.clone();
472 async move |this, cx| {
473 if let Some(task) = state
474 .update(cx, |state, cx| state.authenticate(cx))
475 .log_err()
476 {
477 task.await.log_err();
478 }
479 this.update(cx, |this, cx| {
480 this.loading_models_task = None;
481 cx.notify();
482 })
483 .log_err();
484 }
485 }));
486
487 Self {
488 state,
489 loading_models_task,
490 }
491 }
492
493 fn retry_connection(&self, cx: &mut App) {
494 self.state
495 .update(cx, |state, cx| state.fetch_models(cx))
496 .detach_and_log_err(cx);
497 }
498}
499
500impl Render for ConfigurationView {
501 fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
502 let is_authenticated = self.state.read(cx).is_authenticated();
503
504 let ollama_intro =
505 "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
506
507 if self.loading_models_task.is_some() {
508 div().child(Label::new("Loading models...")).into_any()
509 } else {
510 v_flex()
511 .gap_2()
512 .child(
513 v_flex().gap_1().child(Label::new(ollama_intro)).child(
514 List::new()
515 .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
516 .child(InstructionListItem::text_only(
517 "Once installed, try `ollama run llama3.2`",
518 )),
519 ),
520 )
521 .child(
522 h_flex()
523 .w_full()
524 .justify_between()
525 .gap_2()
526 .child(
527 h_flex()
528 .w_full()
529 .gap_2()
530 .map(|this| {
531 if is_authenticated {
532 this.child(
533 Button::new("ollama-site", "Ollama")
534 .style(ButtonStyle::Subtle)
535 .icon(IconName::ArrowUpRight)
536 .icon_size(IconSize::XSmall)
537 .icon_color(Color::Muted)
538 .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
539 .into_any_element(),
540 )
541 } else {
542 this.child(
543 Button::new(
544 "download_ollama_button",
545 "Download Ollama",
546 )
547 .style(ButtonStyle::Subtle)
548 .icon(IconName::ArrowUpRight)
549 .icon_size(IconSize::XSmall)
550 .icon_color(Color::Muted)
551 .on_click(move |_, _, cx| {
552 cx.open_url(OLLAMA_DOWNLOAD_URL)
553 })
554 .into_any_element(),
555 )
556 }
557 })
558 .child(
559 Button::new("view-models", "All Models")
560 .style(ButtonStyle::Subtle)
561 .icon(IconName::ArrowUpRight)
562 .icon_size(IconSize::XSmall)
563 .icon_color(Color::Muted)
564 .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
565 ),
566 )
567 .map(|this| {
568 if is_authenticated {
569 this.child(
570 ButtonLike::new("connected")
571 .disabled(true)
572 .cursor_style(gpui::CursorStyle::Arrow)
573 .child(
574 h_flex()
575 .gap_2()
576 .child(Indicator::dot().color(Color::Success))
577 .child(Label::new("Connected"))
578 .into_any_element(),
579 ),
580 )
581 } else {
582 this.child(
583 Button::new("retry_ollama_models", "Connect")
584 .icon_position(IconPosition::Start)
585 .icon_size(IconSize::XSmall)
586 .icon(IconName::Play)
587 .on_click(cx.listener(move |this, _, _, cx| {
588 this.retry_connection(cx)
589 })),
590 )
591 }
592 })
593 )
594 .into_any()
595 }
596 }
597}
598
599fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
600 ollama::OllamaTool::Function {
601 function: OllamaFunctionTool {
602 name: tool.name,
603 description: Some(tool.description),
604 parameters: Some(tool.input_schema),
605 },
606 }
607}