1use anyhow::{Result, anyhow};
2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
3use futures::{Stream, TryFutureExt, stream};
4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
5use http_client::HttpClient;
6use language_model::{
7 AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
8 LanguageModelRequestTool, LanguageModelToolUse, LanguageModelToolUseId, StopReason,
9};
10use language_model::{
11 LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
12 LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
13 LanguageModelRequest, RateLimiter, Role,
14};
15use ollama::{
16 ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionTool,
17 OllamaToolCall, get_models, preload_model, show_model, stream_chat_completion,
18};
19use schemars::JsonSchema;
20use serde::{Deserialize, Serialize};
21use settings::{Settings, SettingsStore};
22use std::pin::Pin;
23use std::sync::atomic::{AtomicU64, Ordering};
24use std::{collections::BTreeMap, sync::Arc};
25use ui::{ButtonLike, Indicator, List, prelude::*};
26use util::ResultExt;
27
28use crate::AllLanguageModelSettings;
29use crate::ui::InstructionListItem;
30
31const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
32const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
33const OLLAMA_SITE: &str = "https://ollama.com/";
34
35const PROVIDER_ID: &str = "ollama";
36const PROVIDER_NAME: &str = "Ollama";
37
38#[derive(Default, Debug, Clone, PartialEq)]
39pub struct OllamaSettings {
40 pub api_url: String,
41 pub available_models: Vec<AvailableModel>,
42}
43
44#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
45pub struct AvailableModel {
46 /// The model name in the Ollama API (e.g. "llama3.2:latest")
47 pub name: String,
48 /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
49 pub display_name: Option<String>,
50 /// The Context Length parameter to the model (aka num_ctx or n_ctx)
51 pub max_tokens: usize,
52 /// The number of seconds to keep the connection open after the last request
53 pub keep_alive: Option<KeepAlive>,
54 /// Whether the model supports tools
55 pub supports_tools: Option<bool>,
56}
57
58pub struct OllamaLanguageModelProvider {
59 http_client: Arc<dyn HttpClient>,
60 state: gpui::Entity<State>,
61}
62
63pub struct State {
64 http_client: Arc<dyn HttpClient>,
65 available_models: Vec<ollama::Model>,
66 fetch_model_task: Option<Task<Result<()>>>,
67 _subscription: Subscription,
68}
69
70impl State {
71 fn is_authenticated(&self) -> bool {
72 !self.available_models.is_empty()
73 }
74
75 fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
76 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
77 let http_client = Arc::clone(&self.http_client);
78 let api_url = settings.api_url.clone();
79
80 // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
81 cx.spawn(async move |this, cx| {
82 let models = get_models(http_client.as_ref(), &api_url, None).await?;
83
84 let tasks = models
85 .into_iter()
86 // Since there is no metadata from the Ollama API
87 // indicating which models are embedding models,
88 // simply filter out models with "-embed" in their name
89 .filter(|model| !model.name.contains("-embed"))
90 .map(|model| {
91 let http_client = Arc::clone(&http_client);
92 let api_url = api_url.clone();
93 async move {
94 let name = model.name.as_str();
95 let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
96 let ollama_model = ollama::Model::new(
97 name,
98 None,
99 None,
100 Some(capabilities.supports_tools()),
101 );
102 Ok(ollama_model)
103 }
104 });
105
106 // Rate-limit capability fetches
107 // since there is an arbitrary number of models available
108 let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
109 .buffer_unordered(5)
110 .collect::<Vec<Result<_>>>()
111 .await
112 .into_iter()
113 .collect::<Result<Vec<_>>>()?;
114
115 ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
116
117 this.update(cx, |this, cx| {
118 this.available_models = ollama_models;
119 cx.notify();
120 })
121 })
122 }
123
124 fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
125 let task = self.fetch_models(cx);
126 self.fetch_model_task.replace(task);
127 }
128
129 fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
130 if self.is_authenticated() {
131 return Task::ready(Ok(()));
132 }
133
134 let fetch_models_task = self.fetch_models(cx);
135 cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
136 }
137}
138
139impl OllamaLanguageModelProvider {
140 pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
141 let this = Self {
142 http_client: http_client.clone(),
143 state: cx.new(|cx| {
144 let subscription = cx.observe_global::<SettingsStore>({
145 let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
146 move |this: &mut State, cx| {
147 let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
148 if &settings != new_settings {
149 settings = new_settings.clone();
150 this.restart_fetch_models_task(cx);
151 cx.notify();
152 }
153 }
154 });
155
156 State {
157 http_client,
158 available_models: Default::default(),
159 fetch_model_task: None,
160 _subscription: subscription,
161 }
162 }),
163 };
164 this.state
165 .update(cx, |state, cx| state.restart_fetch_models_task(cx));
166 this
167 }
168}
169
170impl LanguageModelProviderState for OllamaLanguageModelProvider {
171 type ObservableEntity = State;
172
173 fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
174 Some(self.state.clone())
175 }
176}
177
178impl LanguageModelProvider for OllamaLanguageModelProvider {
179 fn id(&self) -> LanguageModelProviderId {
180 LanguageModelProviderId(PROVIDER_ID.into())
181 }
182
183 fn name(&self) -> LanguageModelProviderName {
184 LanguageModelProviderName(PROVIDER_NAME.into())
185 }
186
187 fn icon(&self) -> IconName {
188 IconName::AiOllama
189 }
190
191 fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
192 self.provided_models(cx).into_iter().next()
193 }
194
195 fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
196 self.default_model(cx)
197 }
198
199 fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
200 let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
201
202 // Add models from the Ollama API
203 for model in self.state.read(cx).available_models.iter() {
204 models.insert(model.name.clone(), model.clone());
205 }
206
207 // Override with available models from settings
208 for model in AllLanguageModelSettings::get_global(cx)
209 .ollama
210 .available_models
211 .iter()
212 {
213 models.insert(
214 model.name.clone(),
215 ollama::Model {
216 name: model.name.clone(),
217 display_name: model.display_name.clone(),
218 max_tokens: model.max_tokens,
219 keep_alive: model.keep_alive.clone(),
220 supports_tools: model.supports_tools,
221 },
222 );
223 }
224
225 models
226 .into_values()
227 .map(|model| {
228 Arc::new(OllamaLanguageModel {
229 id: LanguageModelId::from(model.name.clone()),
230 model: model.clone(),
231 http_client: self.http_client.clone(),
232 request_limiter: RateLimiter::new(4),
233 }) as Arc<dyn LanguageModel>
234 })
235 .collect()
236 }
237
238 fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &App) {
239 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
240 let http_client = self.http_client.clone();
241 let api_url = settings.api_url.clone();
242 let id = model.id().0.to_string();
243 cx.spawn(async move |_| preload_model(http_client, &api_url, &id).await)
244 .detach_and_log_err(cx);
245 }
246
247 fn is_authenticated(&self, cx: &App) -> bool {
248 self.state.read(cx).is_authenticated()
249 }
250
251 fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
252 self.state.update(cx, |state, cx| state.authenticate(cx))
253 }
254
255 fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView {
256 let state = self.state.clone();
257 cx.new(|cx| ConfigurationView::new(state, window, cx))
258 .into()
259 }
260
261 fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
262 self.state.update(cx, |state, cx| state.fetch_models(cx))
263 }
264}
265
266pub struct OllamaLanguageModel {
267 id: LanguageModelId,
268 model: ollama::Model,
269 http_client: Arc<dyn HttpClient>,
270 request_limiter: RateLimiter,
271}
272
273impl OllamaLanguageModel {
274 fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
275 ChatRequest {
276 model: self.model.name.clone(),
277 messages: request
278 .messages
279 .into_iter()
280 .map(|msg| match msg.role {
281 Role::User => ChatMessage::User {
282 content: msg.string_contents(),
283 },
284 Role::Assistant => ChatMessage::Assistant {
285 content: msg.string_contents(),
286 tool_calls: None,
287 },
288 Role::System => ChatMessage::System {
289 content: msg.string_contents(),
290 },
291 })
292 .collect(),
293 keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
294 stream: true,
295 options: Some(ChatOptions {
296 num_ctx: Some(self.model.max_tokens),
297 stop: Some(request.stop),
298 temperature: request.temperature.or(Some(1.0)),
299 ..Default::default()
300 }),
301 tools: request.tools.into_iter().map(tool_into_ollama).collect(),
302 }
303 }
304}
305
306impl LanguageModel for OllamaLanguageModel {
307 fn id(&self) -> LanguageModelId {
308 self.id.clone()
309 }
310
311 fn name(&self) -> LanguageModelName {
312 LanguageModelName::from(self.model.display_name().to_string())
313 }
314
315 fn provider_id(&self) -> LanguageModelProviderId {
316 LanguageModelProviderId(PROVIDER_ID.into())
317 }
318
319 fn provider_name(&self) -> LanguageModelProviderName {
320 LanguageModelProviderName(PROVIDER_NAME.into())
321 }
322
323 fn supports_tools(&self) -> bool {
324 self.model.supports_tools.unwrap_or(false)
325 }
326
327 fn telemetry_id(&self) -> String {
328 format!("ollama/{}", self.model.id())
329 }
330
331 fn max_token_count(&self) -> usize {
332 self.model.max_token_count()
333 }
334
335 fn count_tokens(
336 &self,
337 request: LanguageModelRequest,
338 _cx: &App,
339 ) -> BoxFuture<'static, Result<usize>> {
340 // There is no endpoint for this _yet_ in Ollama
341 // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
342 let token_count = request
343 .messages
344 .iter()
345 .map(|msg| msg.string_contents().chars().count())
346 .sum::<usize>()
347 / 4;
348
349 async move { Ok(token_count) }.boxed()
350 }
351
352 fn stream_completion(
353 &self,
354 request: LanguageModelRequest,
355 cx: &AsyncApp,
356 ) -> BoxFuture<
357 'static,
358 Result<
359 BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
360 >,
361 > {
362 let request = self.to_ollama_request(request);
363
364 let http_client = self.http_client.clone();
365 let Ok(api_url) = cx.update(|cx| {
366 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
367 settings.api_url.clone()
368 }) else {
369 return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
370 };
371
372 let future = self.request_limiter.stream(async move {
373 let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
374 let stream = map_to_language_model_completion_events(stream);
375 Ok(stream)
376 });
377
378 future.map_ok(|f| f.boxed()).boxed()
379 }
380}
381
382fn map_to_language_model_completion_events(
383 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
384) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
385 // Used for creating unique tool use ids
386 static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
387
388 struct State {
389 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
390 used_tools: bool,
391 }
392
393 // We need to create a ToolUse and Stop event from a single
394 // response from the original stream
395 let stream = stream::unfold(
396 State {
397 stream,
398 used_tools: false,
399 },
400 async move |mut state| {
401 let response = state.stream.next().await?;
402
403 let delta = match response {
404 Ok(delta) => delta,
405 Err(e) => {
406 let event = Err(LanguageModelCompletionError::Other(anyhow!(e)));
407 return Some((vec![event], state));
408 }
409 };
410
411 let mut events = Vec::new();
412
413 match delta.message {
414 ChatMessage::User { content } => {
415 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
416 }
417 ChatMessage::System { content } => {
418 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
419 }
420 ChatMessage::Assistant {
421 content,
422 tool_calls,
423 } => {
424 // Check for tool calls
425 if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
426 match tool_call {
427 OllamaToolCall::Function(function) => {
428 let tool_id = format!(
429 "{}-{}",
430 &function.name,
431 TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
432 );
433 let event =
434 LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
435 id: LanguageModelToolUseId::from(tool_id),
436 name: Arc::from(function.name),
437 raw_input: function.arguments.to_string(),
438 input: function.arguments,
439 is_input_complete: true,
440 });
441 events.push(Ok(event));
442 state.used_tools = true;
443 }
444 }
445 } else {
446 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
447 }
448 }
449 };
450
451 if delta.done {
452 if state.used_tools {
453 state.used_tools = false;
454 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
455 } else {
456 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
457 }
458 }
459
460 Some((events, state))
461 },
462 );
463
464 stream.flat_map(futures::stream::iter)
465}
466
467struct ConfigurationView {
468 state: gpui::Entity<State>,
469 loading_models_task: Option<Task<()>>,
470}
471
472impl ConfigurationView {
473 pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
474 let loading_models_task = Some(cx.spawn_in(window, {
475 let state = state.clone();
476 async move |this, cx| {
477 if let Some(task) = state
478 .update(cx, |state, cx| state.authenticate(cx))
479 .log_err()
480 {
481 task.await.log_err();
482 }
483 this.update(cx, |this, cx| {
484 this.loading_models_task = None;
485 cx.notify();
486 })
487 .log_err();
488 }
489 }));
490
491 Self {
492 state,
493 loading_models_task,
494 }
495 }
496
497 fn retry_connection(&self, cx: &mut App) {
498 self.state
499 .update(cx, |state, cx| state.fetch_models(cx))
500 .detach_and_log_err(cx);
501 }
502}
503
504impl Render for ConfigurationView {
505 fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
506 let is_authenticated = self.state.read(cx).is_authenticated();
507
508 let ollama_intro =
509 "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
510
511 if self.loading_models_task.is_some() {
512 div().child(Label::new("Loading models...")).into_any()
513 } else {
514 v_flex()
515 .gap_2()
516 .child(
517 v_flex().gap_1().child(Label::new(ollama_intro)).child(
518 List::new()
519 .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
520 .child(InstructionListItem::text_only(
521 "Once installed, try `ollama run llama3.2`",
522 )),
523 ),
524 )
525 .child(
526 h_flex()
527 .w_full()
528 .justify_between()
529 .gap_2()
530 .child(
531 h_flex()
532 .w_full()
533 .gap_2()
534 .map(|this| {
535 if is_authenticated {
536 this.child(
537 Button::new("ollama-site", "Ollama")
538 .style(ButtonStyle::Subtle)
539 .icon(IconName::ArrowUpRight)
540 .icon_size(IconSize::XSmall)
541 .icon_color(Color::Muted)
542 .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
543 .into_any_element(),
544 )
545 } else {
546 this.child(
547 Button::new(
548 "download_ollama_button",
549 "Download Ollama",
550 )
551 .style(ButtonStyle::Subtle)
552 .icon(IconName::ArrowUpRight)
553 .icon_size(IconSize::XSmall)
554 .icon_color(Color::Muted)
555 .on_click(move |_, _, cx| {
556 cx.open_url(OLLAMA_DOWNLOAD_URL)
557 })
558 .into_any_element(),
559 )
560 }
561 })
562 .child(
563 Button::new("view-models", "All Models")
564 .style(ButtonStyle::Subtle)
565 .icon(IconName::ArrowUpRight)
566 .icon_size(IconSize::XSmall)
567 .icon_color(Color::Muted)
568 .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
569 ),
570 )
571 .map(|this| {
572 if is_authenticated {
573 this.child(
574 ButtonLike::new("connected")
575 .disabled(true)
576 .cursor_style(gpui::CursorStyle::Arrow)
577 .child(
578 h_flex()
579 .gap_2()
580 .child(Indicator::dot().color(Color::Success))
581 .child(Label::new("Connected"))
582 .into_any_element(),
583 ),
584 )
585 } else {
586 this.child(
587 Button::new("retry_ollama_models", "Connect")
588 .icon_position(IconPosition::Start)
589 .icon_size(IconSize::XSmall)
590 .icon(IconName::Play)
591 .on_click(cx.listener(move |this, _, _, cx| {
592 this.retry_connection(cx)
593 })),
594 )
595 }
596 })
597 )
598 .into_any()
599 }
600 }
601}
602
603fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
604 ollama::OllamaTool::Function {
605 function: OllamaFunctionTool {
606 name: tool.name,
607 description: Some(tool.description),
608 parameters: Some(tool.input_schema),
609 },
610 }
611}