1use anyhow::{Result, anyhow};
2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
3use futures::{Stream, TryFutureExt, stream};
4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
5use http_client::HttpClient;
6use language_model::{
7 AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
8 LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
9 LanguageModelToolUseId, StopReason,
10};
11use language_model::{
12 LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
13 LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
14 LanguageModelRequest, RateLimiter, Role,
15};
16use ollama::{
17 ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionTool,
18 OllamaToolCall, get_models, preload_model, show_model, stream_chat_completion,
19};
20use schemars::JsonSchema;
21use serde::{Deserialize, Serialize};
22use settings::{Settings, SettingsStore};
23use std::pin::Pin;
24use std::sync::atomic::{AtomicU64, Ordering};
25use std::{collections::BTreeMap, sync::Arc};
26use ui::{ButtonLike, Indicator, List, prelude::*};
27use util::ResultExt;
28
29use crate::AllLanguageModelSettings;
30use crate::ui::InstructionListItem;
31
32const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
33const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
34const OLLAMA_SITE: &str = "https://ollama.com/";
35
36const PROVIDER_ID: &str = "ollama";
37const PROVIDER_NAME: &str = "Ollama";
38
39#[derive(Default, Debug, Clone, PartialEq)]
40pub struct OllamaSettings {
41 pub api_url: String,
42 pub available_models: Vec<AvailableModel>,
43}
44
45#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
46pub struct AvailableModel {
47 /// The model name in the Ollama API (e.g. "llama3.2:latest")
48 pub name: String,
49 /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
50 pub display_name: Option<String>,
51 /// The Context Length parameter to the model (aka num_ctx or n_ctx)
52 pub max_tokens: usize,
53 /// The number of seconds to keep the connection open after the last request
54 pub keep_alive: Option<KeepAlive>,
55 /// Whether the model supports tools
56 pub supports_tools: Option<bool>,
57}
58
59pub struct OllamaLanguageModelProvider {
60 http_client: Arc<dyn HttpClient>,
61 state: gpui::Entity<State>,
62}
63
64pub struct State {
65 http_client: Arc<dyn HttpClient>,
66 available_models: Vec<ollama::Model>,
67 fetch_model_task: Option<Task<Result<()>>>,
68 _subscription: Subscription,
69}
70
71impl State {
72 fn is_authenticated(&self) -> bool {
73 !self.available_models.is_empty()
74 }
75
76 fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
77 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
78 let http_client = Arc::clone(&self.http_client);
79 let api_url = settings.api_url.clone();
80
81 // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
82 cx.spawn(async move |this, cx| {
83 let models = get_models(http_client.as_ref(), &api_url, None).await?;
84
85 let tasks = models
86 .into_iter()
87 // Since there is no metadata from the Ollama API
88 // indicating which models are embedding models,
89 // simply filter out models with "-embed" in their name
90 .filter(|model| !model.name.contains("-embed"))
91 .map(|model| {
92 let http_client = Arc::clone(&http_client);
93 let api_url = api_url.clone();
94 async move {
95 let name = model.name.as_str();
96 let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
97 let ollama_model = ollama::Model::new(
98 name,
99 None,
100 None,
101 Some(capabilities.supports_tools()),
102 );
103 Ok(ollama_model)
104 }
105 });
106
107 // Rate-limit capability fetches
108 // since there is an arbitrary number of models available
109 let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
110 .buffer_unordered(5)
111 .collect::<Vec<Result<_>>>()
112 .await
113 .into_iter()
114 .collect::<Result<Vec<_>>>()?;
115
116 ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
117
118 this.update(cx, |this, cx| {
119 this.available_models = ollama_models;
120 cx.notify();
121 })
122 })
123 }
124
125 fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
126 let task = self.fetch_models(cx);
127 self.fetch_model_task.replace(task);
128 }
129
130 fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
131 if self.is_authenticated() {
132 return Task::ready(Ok(()));
133 }
134
135 let fetch_models_task = self.fetch_models(cx);
136 cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
137 }
138}
139
140impl OllamaLanguageModelProvider {
141 pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
142 let this = Self {
143 http_client: http_client.clone(),
144 state: cx.new(|cx| {
145 let subscription = cx.observe_global::<SettingsStore>({
146 let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
147 move |this: &mut State, cx| {
148 let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
149 if &settings != new_settings {
150 settings = new_settings.clone();
151 this.restart_fetch_models_task(cx);
152 cx.notify();
153 }
154 }
155 });
156
157 State {
158 http_client,
159 available_models: Default::default(),
160 fetch_model_task: None,
161 _subscription: subscription,
162 }
163 }),
164 };
165 this.state
166 .update(cx, |state, cx| state.restart_fetch_models_task(cx));
167 this
168 }
169}
170
171impl LanguageModelProviderState for OllamaLanguageModelProvider {
172 type ObservableEntity = State;
173
174 fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
175 Some(self.state.clone())
176 }
177}
178
179impl LanguageModelProvider for OllamaLanguageModelProvider {
180 fn id(&self) -> LanguageModelProviderId {
181 LanguageModelProviderId(PROVIDER_ID.into())
182 }
183
184 fn name(&self) -> LanguageModelProviderName {
185 LanguageModelProviderName(PROVIDER_NAME.into())
186 }
187
188 fn icon(&self) -> IconName {
189 IconName::AiOllama
190 }
191
192 fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
193 self.provided_models(cx).into_iter().next()
194 }
195
196 fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
197 self.default_model(cx)
198 }
199
200 fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
201 let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
202
203 // Add models from the Ollama API
204 for model in self.state.read(cx).available_models.iter() {
205 models.insert(model.name.clone(), model.clone());
206 }
207
208 // Override with available models from settings
209 for model in AllLanguageModelSettings::get_global(cx)
210 .ollama
211 .available_models
212 .iter()
213 {
214 models.insert(
215 model.name.clone(),
216 ollama::Model {
217 name: model.name.clone(),
218 display_name: model.display_name.clone(),
219 max_tokens: model.max_tokens,
220 keep_alive: model.keep_alive.clone(),
221 supports_tools: model.supports_tools,
222 },
223 );
224 }
225
226 models
227 .into_values()
228 .map(|model| {
229 Arc::new(OllamaLanguageModel {
230 id: LanguageModelId::from(model.name.clone()),
231 model: model.clone(),
232 http_client: self.http_client.clone(),
233 request_limiter: RateLimiter::new(4),
234 }) as Arc<dyn LanguageModel>
235 })
236 .collect()
237 }
238
239 fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &App) {
240 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
241 let http_client = self.http_client.clone();
242 let api_url = settings.api_url.clone();
243 let id = model.id().0.to_string();
244 cx.spawn(async move |_| preload_model(http_client, &api_url, &id).await)
245 .detach_and_log_err(cx);
246 }
247
248 fn is_authenticated(&self, cx: &App) -> bool {
249 self.state.read(cx).is_authenticated()
250 }
251
252 fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
253 self.state.update(cx, |state, cx| state.authenticate(cx))
254 }
255
256 fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView {
257 let state = self.state.clone();
258 cx.new(|cx| ConfigurationView::new(state, window, cx))
259 .into()
260 }
261
262 fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
263 self.state.update(cx, |state, cx| state.fetch_models(cx))
264 }
265}
266
267pub struct OllamaLanguageModel {
268 id: LanguageModelId,
269 model: ollama::Model,
270 http_client: Arc<dyn HttpClient>,
271 request_limiter: RateLimiter,
272}
273
274impl OllamaLanguageModel {
275 fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
276 ChatRequest {
277 model: self.model.name.clone(),
278 messages: request
279 .messages
280 .into_iter()
281 .map(|msg| match msg.role {
282 Role::User => ChatMessage::User {
283 content: msg.string_contents(),
284 },
285 Role::Assistant => ChatMessage::Assistant {
286 content: msg.string_contents(),
287 tool_calls: None,
288 },
289 Role::System => ChatMessage::System {
290 content: msg.string_contents(),
291 },
292 })
293 .collect(),
294 keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
295 stream: true,
296 options: Some(ChatOptions {
297 num_ctx: Some(self.model.max_tokens),
298 stop: Some(request.stop),
299 temperature: request.temperature.or(Some(1.0)),
300 ..Default::default()
301 }),
302 tools: request.tools.into_iter().map(tool_into_ollama).collect(),
303 }
304 }
305}
306
307impl LanguageModel for OllamaLanguageModel {
308 fn id(&self) -> LanguageModelId {
309 self.id.clone()
310 }
311
312 fn name(&self) -> LanguageModelName {
313 LanguageModelName::from(self.model.display_name().to_string())
314 }
315
316 fn provider_id(&self) -> LanguageModelProviderId {
317 LanguageModelProviderId(PROVIDER_ID.into())
318 }
319
320 fn provider_name(&self) -> LanguageModelProviderName {
321 LanguageModelProviderName(PROVIDER_NAME.into())
322 }
323
324 fn supports_tools(&self) -> bool {
325 self.model.supports_tools.unwrap_or(false)
326 }
327
328 fn supports_images(&self) -> bool {
329 false
330 }
331
332 fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
333 match choice {
334 LanguageModelToolChoice::Auto => false,
335 LanguageModelToolChoice::Any => false,
336 LanguageModelToolChoice::None => false,
337 }
338 }
339
340 fn telemetry_id(&self) -> String {
341 format!("ollama/{}", self.model.id())
342 }
343
344 fn max_token_count(&self) -> usize {
345 self.model.max_token_count()
346 }
347
348 fn count_tokens(
349 &self,
350 request: LanguageModelRequest,
351 _cx: &App,
352 ) -> BoxFuture<'static, Result<usize>> {
353 // There is no endpoint for this _yet_ in Ollama
354 // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
355 let token_count = request
356 .messages
357 .iter()
358 .map(|msg| msg.string_contents().chars().count())
359 .sum::<usize>()
360 / 4;
361
362 async move { Ok(token_count) }.boxed()
363 }
364
365 fn stream_completion(
366 &self,
367 request: LanguageModelRequest,
368 cx: &AsyncApp,
369 ) -> BoxFuture<
370 'static,
371 Result<
372 BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
373 >,
374 > {
375 let request = self.to_ollama_request(request);
376
377 let http_client = self.http_client.clone();
378 let Ok(api_url) = cx.update(|cx| {
379 let settings = &AllLanguageModelSettings::get_global(cx).ollama;
380 settings.api_url.clone()
381 }) else {
382 return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
383 };
384
385 let future = self.request_limiter.stream(async move {
386 let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
387 let stream = map_to_language_model_completion_events(stream);
388 Ok(stream)
389 });
390
391 future.map_ok(|f| f.boxed()).boxed()
392 }
393}
394
395fn map_to_language_model_completion_events(
396 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
397) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
398 // Used for creating unique tool use ids
399 static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
400
401 struct State {
402 stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
403 used_tools: bool,
404 }
405
406 // We need to create a ToolUse and Stop event from a single
407 // response from the original stream
408 let stream = stream::unfold(
409 State {
410 stream,
411 used_tools: false,
412 },
413 async move |mut state| {
414 let response = state.stream.next().await?;
415
416 let delta = match response {
417 Ok(delta) => delta,
418 Err(e) => {
419 let event = Err(LanguageModelCompletionError::Other(anyhow!(e)));
420 return Some((vec![event], state));
421 }
422 };
423
424 let mut events = Vec::new();
425
426 match delta.message {
427 ChatMessage::User { content } => {
428 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
429 }
430 ChatMessage::System { content } => {
431 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
432 }
433 ChatMessage::Assistant {
434 content,
435 tool_calls,
436 } => {
437 // Check for tool calls
438 if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
439 match tool_call {
440 OllamaToolCall::Function(function) => {
441 let tool_id = format!(
442 "{}-{}",
443 &function.name,
444 TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
445 );
446 let event =
447 LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
448 id: LanguageModelToolUseId::from(tool_id),
449 name: Arc::from(function.name),
450 raw_input: function.arguments.to_string(),
451 input: function.arguments,
452 is_input_complete: true,
453 });
454 events.push(Ok(event));
455 state.used_tools = true;
456 }
457 }
458 } else {
459 events.push(Ok(LanguageModelCompletionEvent::Text(content)));
460 }
461 }
462 };
463
464 if delta.done {
465 if state.used_tools {
466 state.used_tools = false;
467 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
468 } else {
469 events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
470 }
471 }
472
473 Some((events, state))
474 },
475 );
476
477 stream.flat_map(futures::stream::iter)
478}
479
480struct ConfigurationView {
481 state: gpui::Entity<State>,
482 loading_models_task: Option<Task<()>>,
483}
484
485impl ConfigurationView {
486 pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
487 let loading_models_task = Some(cx.spawn_in(window, {
488 let state = state.clone();
489 async move |this, cx| {
490 if let Some(task) = state
491 .update(cx, |state, cx| state.authenticate(cx))
492 .log_err()
493 {
494 task.await.log_err();
495 }
496 this.update(cx, |this, cx| {
497 this.loading_models_task = None;
498 cx.notify();
499 })
500 .log_err();
501 }
502 }));
503
504 Self {
505 state,
506 loading_models_task,
507 }
508 }
509
510 fn retry_connection(&self, cx: &mut App) {
511 self.state
512 .update(cx, |state, cx| state.fetch_models(cx))
513 .detach_and_log_err(cx);
514 }
515}
516
517impl Render for ConfigurationView {
518 fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
519 let is_authenticated = self.state.read(cx).is_authenticated();
520
521 let ollama_intro =
522 "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
523
524 if self.loading_models_task.is_some() {
525 div().child(Label::new("Loading models...")).into_any()
526 } else {
527 v_flex()
528 .gap_2()
529 .child(
530 v_flex().gap_1().child(Label::new(ollama_intro)).child(
531 List::new()
532 .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
533 .child(InstructionListItem::text_only(
534 "Once installed, try `ollama run llama3.2`",
535 )),
536 ),
537 )
538 .child(
539 h_flex()
540 .w_full()
541 .justify_between()
542 .gap_2()
543 .child(
544 h_flex()
545 .w_full()
546 .gap_2()
547 .map(|this| {
548 if is_authenticated {
549 this.child(
550 Button::new("ollama-site", "Ollama")
551 .style(ButtonStyle::Subtle)
552 .icon(IconName::ArrowUpRight)
553 .icon_size(IconSize::XSmall)
554 .icon_color(Color::Muted)
555 .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
556 .into_any_element(),
557 )
558 } else {
559 this.child(
560 Button::new(
561 "download_ollama_button",
562 "Download Ollama",
563 )
564 .style(ButtonStyle::Subtle)
565 .icon(IconName::ArrowUpRight)
566 .icon_size(IconSize::XSmall)
567 .icon_color(Color::Muted)
568 .on_click(move |_, _, cx| {
569 cx.open_url(OLLAMA_DOWNLOAD_URL)
570 })
571 .into_any_element(),
572 )
573 }
574 })
575 .child(
576 Button::new("view-models", "All Models")
577 .style(ButtonStyle::Subtle)
578 .icon(IconName::ArrowUpRight)
579 .icon_size(IconSize::XSmall)
580 .icon_color(Color::Muted)
581 .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
582 ),
583 )
584 .map(|this| {
585 if is_authenticated {
586 this.child(
587 ButtonLike::new("connected")
588 .disabled(true)
589 .cursor_style(gpui::CursorStyle::Arrow)
590 .child(
591 h_flex()
592 .gap_2()
593 .child(Indicator::dot().color(Color::Success))
594 .child(Label::new("Connected"))
595 .into_any_element(),
596 ),
597 )
598 } else {
599 this.child(
600 Button::new("retry_ollama_models", "Connect")
601 .icon_position(IconPosition::Start)
602 .icon_size(IconSize::XSmall)
603 .icon(IconName::Play)
604 .on_click(cx.listener(move |this, _, _, cx| {
605 this.retry_connection(cx)
606 })),
607 )
608 }
609 })
610 )
611 .into_any()
612 }
613 }
614}
615
616fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
617 ollama::OllamaTool::Function {
618 function: OllamaFunctionTool {
619 name: tool.name,
620 description: Some(tool.description),
621 parameters: Some(tool.input_schema),
622 },
623 }
624}