ollama.rs

  1use anyhow::{Result, anyhow};
  2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
  3use futures::{Stream, TryFutureExt, stream};
  4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
  5use http_client::HttpClient;
  6use language_model::{
  7    AuthenticateError, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
  8    LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
  9    LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
 10    LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
 11    LanguageModelToolUseId, MessageContent, RateLimiter, Role, StopReason,
 12};
 13use ollama::{
 14    ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionTool,
 15    OllamaToolCall, get_models, preload_model, show_model, stream_chat_completion,
 16};
 17use schemars::JsonSchema;
 18use serde::{Deserialize, Serialize};
 19use settings::{Settings, SettingsStore};
 20use std::pin::Pin;
 21use std::sync::atomic::{AtomicU64, Ordering};
 22use std::{collections::BTreeMap, sync::Arc};
 23use ui::{ButtonLike, Indicator, List, prelude::*};
 24use util::ResultExt;
 25
 26use crate::AllLanguageModelSettings;
 27use crate::ui::InstructionListItem;
 28
 29const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
 30const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
 31const OLLAMA_SITE: &str = "https://ollama.com/";
 32
 33const PROVIDER_ID: &str = "ollama";
 34const PROVIDER_NAME: &str = "Ollama";
 35
 36#[derive(Default, Debug, Clone, PartialEq)]
 37pub struct OllamaSettings {
 38    pub api_url: String,
 39    pub available_models: Vec<AvailableModel>,
 40}
 41
 42#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
 43pub struct AvailableModel {
 44    /// The model name in the Ollama API (e.g. "llama3.2:latest")
 45    pub name: String,
 46    /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
 47    pub display_name: Option<String>,
 48    /// The Context Length parameter to the model (aka num_ctx or n_ctx)
 49    pub max_tokens: usize,
 50    /// The number of seconds to keep the connection open after the last request
 51    pub keep_alive: Option<KeepAlive>,
 52    /// Whether the model supports tools
 53    pub supports_tools: Option<bool>,
 54    /// Whether the model supports vision
 55    pub supports_images: Option<bool>,
 56    /// Whether to enable think mode
 57    pub supports_thinking: Option<bool>,
 58}
 59
 60pub struct OllamaLanguageModelProvider {
 61    http_client: Arc<dyn HttpClient>,
 62    state: gpui::Entity<State>,
 63}
 64
 65pub struct State {
 66    http_client: Arc<dyn HttpClient>,
 67    available_models: Vec<ollama::Model>,
 68    fetch_model_task: Option<Task<Result<()>>>,
 69    _subscription: Subscription,
 70}
 71
 72impl State {
 73    fn is_authenticated(&self) -> bool {
 74        !self.available_models.is_empty()
 75    }
 76
 77    fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
 78        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
 79        let http_client = Arc::clone(&self.http_client);
 80        let api_url = settings.api_url.clone();
 81
 82        // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
 83        cx.spawn(async move |this, cx| {
 84            let models = get_models(http_client.as_ref(), &api_url, None).await?;
 85
 86            let tasks = models
 87                .into_iter()
 88                // Since there is no metadata from the Ollama API
 89                // indicating which models are embedding models,
 90                // simply filter out models with "-embed" in their name
 91                .filter(|model| !model.name.contains("-embed"))
 92                .map(|model| {
 93                    let http_client = Arc::clone(&http_client);
 94                    let api_url = api_url.clone();
 95                    async move {
 96                        let name = model.name.as_str();
 97                        let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
 98                        let ollama_model = ollama::Model::new(
 99                            name,
100                            None,
101                            None,
102                            Some(capabilities.supports_tools()),
103                            Some(capabilities.supports_vision()),
104                            Some(capabilities.supports_thinking()),
105                        );
106                        Ok(ollama_model)
107                    }
108                });
109
110            // Rate-limit capability fetches
111            // since there is an arbitrary number of models available
112            let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
113                .buffer_unordered(5)
114                .collect::<Vec<Result<_>>>()
115                .await
116                .into_iter()
117                .collect::<Result<Vec<_>>>()?;
118
119            ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
120
121            this.update(cx, |this, cx| {
122                this.available_models = ollama_models;
123                cx.notify();
124            })
125        })
126    }
127
128    fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
129        let task = self.fetch_models(cx);
130        self.fetch_model_task.replace(task);
131    }
132
133    fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
134        if self.is_authenticated() {
135            return Task::ready(Ok(()));
136        }
137
138        let fetch_models_task = self.fetch_models(cx);
139        cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
140    }
141}
142
143impl OllamaLanguageModelProvider {
144    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
145        let this = Self {
146            http_client: http_client.clone(),
147            state: cx.new(|cx| {
148                let subscription = cx.observe_global::<SettingsStore>({
149                    let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
150                    move |this: &mut State, cx| {
151                        let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
152                        if &settings != new_settings {
153                            settings = new_settings.clone();
154                            this.restart_fetch_models_task(cx);
155                            cx.notify();
156                        }
157                    }
158                });
159
160                State {
161                    http_client,
162                    available_models: Default::default(),
163                    fetch_model_task: None,
164                    _subscription: subscription,
165                }
166            }),
167        };
168        this.state
169            .update(cx, |state, cx| state.restart_fetch_models_task(cx));
170        this
171    }
172}
173
174impl LanguageModelProviderState for OllamaLanguageModelProvider {
175    type ObservableEntity = State;
176
177    fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
178        Some(self.state.clone())
179    }
180}
181
182impl LanguageModelProvider for OllamaLanguageModelProvider {
183    fn id(&self) -> LanguageModelProviderId {
184        LanguageModelProviderId(PROVIDER_ID.into())
185    }
186
187    fn name(&self) -> LanguageModelProviderName {
188        LanguageModelProviderName(PROVIDER_NAME.into())
189    }
190
191    fn icon(&self) -> IconName {
192        IconName::AiOllama
193    }
194
195    fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
196        self.provided_models(cx).into_iter().next()
197    }
198
199    fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
200        self.default_model(cx)
201    }
202
203    fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
204        let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
205
206        // Add models from the Ollama API
207        for model in self.state.read(cx).available_models.iter() {
208            models.insert(model.name.clone(), model.clone());
209        }
210
211        // Override with available models from settings
212        for model in AllLanguageModelSettings::get_global(cx)
213            .ollama
214            .available_models
215            .iter()
216        {
217            models.insert(
218                model.name.clone(),
219                ollama::Model {
220                    name: model.name.clone(),
221                    display_name: model.display_name.clone(),
222                    max_tokens: model.max_tokens,
223                    keep_alive: model.keep_alive.clone(),
224                    supports_tools: model.supports_tools,
225                    supports_vision: model.supports_images,
226                    supports_thinking: model.supports_thinking,
227                },
228            );
229        }
230
231        models
232            .into_values()
233            .map(|model| {
234                Arc::new(OllamaLanguageModel {
235                    id: LanguageModelId::from(model.name.clone()),
236                    model: model.clone(),
237                    http_client: self.http_client.clone(),
238                    request_limiter: RateLimiter::new(4),
239                }) as Arc<dyn LanguageModel>
240            })
241            .collect()
242    }
243
244    fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &App) {
245        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
246        let http_client = self.http_client.clone();
247        let api_url = settings.api_url.clone();
248        let id = model.id().0.to_string();
249        cx.spawn(async move |_| preload_model(http_client, &api_url, &id).await)
250            .detach_and_log_err(cx);
251    }
252
253    fn is_authenticated(&self, cx: &App) -> bool {
254        self.state.read(cx).is_authenticated()
255    }
256
257    fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
258        self.state.update(cx, |state, cx| state.authenticate(cx))
259    }
260
261    fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView {
262        let state = self.state.clone();
263        cx.new(|cx| ConfigurationView::new(state, window, cx))
264            .into()
265    }
266
267    fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
268        self.state.update(cx, |state, cx| state.fetch_models(cx))
269    }
270}
271
272pub struct OllamaLanguageModel {
273    id: LanguageModelId,
274    model: ollama::Model,
275    http_client: Arc<dyn HttpClient>,
276    request_limiter: RateLimiter,
277}
278
279impl OllamaLanguageModel {
280    fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
281        let supports_vision = self.model.supports_vision.unwrap_or(false);
282
283        ChatRequest {
284            model: self.model.name.clone(),
285            messages: request
286                .messages
287                .into_iter()
288                .map(|msg| {
289                    let images = if supports_vision {
290                        msg.content
291                            .iter()
292                            .filter_map(|content| match content {
293                                MessageContent::Image(image) => Some(image.source.to_string()),
294                                _ => None,
295                            })
296                            .collect::<Vec<String>>()
297                    } else {
298                        vec![]
299                    };
300
301                    match msg.role {
302                        Role::User => ChatMessage::User {
303                            content: msg.string_contents(),
304                            images: if images.is_empty() {
305                                None
306                            } else {
307                                Some(images)
308                            },
309                        },
310                        Role::Assistant => {
311                            let content = msg.string_contents();
312                            let thinking =
313                                msg.content.into_iter().find_map(|content| match content {
314                                    MessageContent::Thinking { text, .. } if !text.is_empty() => {
315                                        Some(text)
316                                    }
317                                    _ => None,
318                                });
319                            ChatMessage::Assistant {
320                                content,
321                                tool_calls: None,
322                                images: if images.is_empty() {
323                                    None
324                                } else {
325                                    Some(images)
326                                },
327                                thinking,
328                            }
329                        }
330                        Role::System => ChatMessage::System {
331                            content: msg.string_contents(),
332                        },
333                    }
334                })
335                .collect(),
336            keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
337            stream: true,
338            options: Some(ChatOptions {
339                num_ctx: Some(self.model.max_tokens),
340                stop: Some(request.stop),
341                temperature: request.temperature.or(Some(1.0)),
342                ..Default::default()
343            }),
344            think: self.model.supports_thinking,
345            tools: request.tools.into_iter().map(tool_into_ollama).collect(),
346        }
347    }
348}
349
350impl LanguageModel for OllamaLanguageModel {
351    fn id(&self) -> LanguageModelId {
352        self.id.clone()
353    }
354
355    fn name(&self) -> LanguageModelName {
356        LanguageModelName::from(self.model.display_name().to_string())
357    }
358
359    fn provider_id(&self) -> LanguageModelProviderId {
360        LanguageModelProviderId(PROVIDER_ID.into())
361    }
362
363    fn provider_name(&self) -> LanguageModelProviderName {
364        LanguageModelProviderName(PROVIDER_NAME.into())
365    }
366
367    fn supports_tools(&self) -> bool {
368        self.model.supports_tools.unwrap_or(false)
369    }
370
371    fn supports_images(&self) -> bool {
372        self.model.supports_vision.unwrap_or(false)
373    }
374
375    fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
376        match choice {
377            LanguageModelToolChoice::Auto => false,
378            LanguageModelToolChoice::Any => false,
379            LanguageModelToolChoice::None => false,
380        }
381    }
382
383    fn telemetry_id(&self) -> String {
384        format!("ollama/{}", self.model.id())
385    }
386
387    fn max_token_count(&self) -> usize {
388        self.model.max_token_count()
389    }
390
391    fn count_tokens(
392        &self,
393        request: LanguageModelRequest,
394        _cx: &App,
395    ) -> BoxFuture<'static, Result<usize>> {
396        // There is no endpoint for this _yet_ in Ollama
397        // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
398        let token_count = request
399            .messages
400            .iter()
401            .map(|msg| msg.string_contents().chars().count())
402            .sum::<usize>()
403            / 4;
404
405        async move { Ok(token_count) }.boxed()
406    }
407
408    fn stream_completion(
409        &self,
410        request: LanguageModelRequest,
411        cx: &AsyncApp,
412    ) -> BoxFuture<
413        'static,
414        Result<
415            BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
416        >,
417    > {
418        let request = self.to_ollama_request(request);
419
420        let http_client = self.http_client.clone();
421        let Ok(api_url) = cx.update(|cx| {
422            let settings = &AllLanguageModelSettings::get_global(cx).ollama;
423            settings.api_url.clone()
424        }) else {
425            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
426        };
427
428        let future = self.request_limiter.stream(async move {
429            let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
430            let stream = map_to_language_model_completion_events(stream);
431            Ok(stream)
432        });
433
434        future.map_ok(|f| f.boxed()).boxed()
435    }
436}
437
438fn map_to_language_model_completion_events(
439    stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
440) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
441    // Used for creating unique tool use ids
442    static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
443
444    struct State {
445        stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
446        used_tools: bool,
447    }
448
449    // We need to create a ToolUse and Stop event from a single
450    // response from the original stream
451    let stream = stream::unfold(
452        State {
453            stream,
454            used_tools: false,
455        },
456        async move |mut state| {
457            let response = state.stream.next().await?;
458
459            let delta = match response {
460                Ok(delta) => delta,
461                Err(e) => {
462                    let event = Err(LanguageModelCompletionError::Other(anyhow!(e)));
463                    return Some((vec![event], state));
464                }
465            };
466
467            let mut events = Vec::new();
468
469            match delta.message {
470                ChatMessage::User { content, images: _ } => {
471                    events.push(Ok(LanguageModelCompletionEvent::Text(content)));
472                }
473                ChatMessage::System { content } => {
474                    events.push(Ok(LanguageModelCompletionEvent::Text(content)));
475                }
476                ChatMessage::Assistant {
477                    content,
478                    tool_calls,
479                    images: _,
480                    thinking,
481                } => {
482                    if let Some(text) = thinking {
483                        events.push(Ok(LanguageModelCompletionEvent::Thinking {
484                            text,
485                            signature: None,
486                        }));
487                    }
488
489                    if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
490                        match tool_call {
491                            OllamaToolCall::Function(function) => {
492                                let tool_id = format!(
493                                    "{}-{}",
494                                    &function.name,
495                                    TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
496                                );
497                                let event =
498                                    LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
499                                        id: LanguageModelToolUseId::from(tool_id),
500                                        name: Arc::from(function.name),
501                                        raw_input: function.arguments.to_string(),
502                                        input: function.arguments,
503                                        is_input_complete: true,
504                                    });
505                                events.push(Ok(event));
506                                state.used_tools = true;
507                            }
508                        }
509                    } else if !content.is_empty() {
510                        events.push(Ok(LanguageModelCompletionEvent::Text(content)));
511                    }
512                }
513            };
514
515            if delta.done {
516                if state.used_tools {
517                    state.used_tools = false;
518                    events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
519                } else {
520                    events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
521                }
522            }
523
524            Some((events, state))
525        },
526    );
527
528    stream.flat_map(futures::stream::iter)
529}
530
531struct ConfigurationView {
532    state: gpui::Entity<State>,
533    loading_models_task: Option<Task<()>>,
534}
535
536impl ConfigurationView {
537    pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
538        let loading_models_task = Some(cx.spawn_in(window, {
539            let state = state.clone();
540            async move |this, cx| {
541                if let Some(task) = state
542                    .update(cx, |state, cx| state.authenticate(cx))
543                    .log_err()
544                {
545                    task.await.log_err();
546                }
547                this.update(cx, |this, cx| {
548                    this.loading_models_task = None;
549                    cx.notify();
550                })
551                .log_err();
552            }
553        }));
554
555        Self {
556            state,
557            loading_models_task,
558        }
559    }
560
561    fn retry_connection(&self, cx: &mut App) {
562        self.state
563            .update(cx, |state, cx| state.fetch_models(cx))
564            .detach_and_log_err(cx);
565    }
566}
567
568impl Render for ConfigurationView {
569    fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
570        let is_authenticated = self.state.read(cx).is_authenticated();
571
572        let ollama_intro =
573            "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
574
575        if self.loading_models_task.is_some() {
576            div().child(Label::new("Loading models...")).into_any()
577        } else {
578            v_flex()
579                .gap_2()
580                .child(
581                    v_flex().gap_1().child(Label::new(ollama_intro)).child(
582                        List::new()
583                            .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
584                            .child(InstructionListItem::text_only(
585                                "Once installed, try `ollama run llama3.2`",
586                            )),
587                    ),
588                )
589                .child(
590                    h_flex()
591                        .w_full()
592                        .justify_between()
593                        .gap_2()
594                        .child(
595                            h_flex()
596                                .w_full()
597                                .gap_2()
598                                .map(|this| {
599                                    if is_authenticated {
600                                        this.child(
601                                            Button::new("ollama-site", "Ollama")
602                                                .style(ButtonStyle::Subtle)
603                                                .icon(IconName::ArrowUpRight)
604                                                .icon_size(IconSize::XSmall)
605                                                .icon_color(Color::Muted)
606                                                .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
607                                                .into_any_element(),
608                                        )
609                                    } else {
610                                        this.child(
611                                            Button::new(
612                                                "download_ollama_button",
613                                                "Download Ollama",
614                                            )
615                                            .style(ButtonStyle::Subtle)
616                                            .icon(IconName::ArrowUpRight)
617                                            .icon_size(IconSize::XSmall)
618                                            .icon_color(Color::Muted)
619                                            .on_click(move |_, _, cx| {
620                                                cx.open_url(OLLAMA_DOWNLOAD_URL)
621                                            })
622                                            .into_any_element(),
623                                        )
624                                    }
625                                })
626                                .child(
627                                    Button::new("view-models", "All Models")
628                                        .style(ButtonStyle::Subtle)
629                                        .icon(IconName::ArrowUpRight)
630                                        .icon_size(IconSize::XSmall)
631                                        .icon_color(Color::Muted)
632                                        .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
633                                ),
634                        )
635                        .map(|this| {
636                            if is_authenticated {
637                                this.child(
638                                    ButtonLike::new("connected")
639                                        .disabled(true)
640                                        .cursor_style(gpui::CursorStyle::Arrow)
641                                        .child(
642                                            h_flex()
643                                                .gap_2()
644                                                .child(Indicator::dot().color(Color::Success))
645                                                .child(Label::new("Connected"))
646                                                .into_any_element(),
647                                        ),
648                                )
649                            } else {
650                                this.child(
651                                    Button::new("retry_ollama_models", "Connect")
652                                        .icon_position(IconPosition::Start)
653                                        .icon_size(IconSize::XSmall)
654                                        .icon(IconName::Play)
655                                        .on_click(cx.listener(move |this, _, _, cx| {
656                                            this.retry_connection(cx)
657                                        })),
658                                )
659                            }
660                        })
661                )
662                .into_any()
663        }
664    }
665}
666
667fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
668    ollama::OllamaTool::Function {
669        function: OllamaFunctionTool {
670            name: tool.name,
671            description: Some(tool.description),
672            parameters: Some(tool.input_schema),
673        },
674    }
675}