ollama.rs

  1use anyhow::{Result, anyhow};
  2use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
  3use futures::{Stream, TryFutureExt, stream};
  4use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
  5use http_client::HttpClient;
  6use language_model::{
  7    AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
  8    LanguageModelRequestTool, LanguageModelToolUse, LanguageModelToolUseId, StopReason,
  9};
 10use language_model::{
 11    LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
 12    LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
 13    LanguageModelRequest, RateLimiter, Role,
 14};
 15use ollama::{
 16    ChatMessage, ChatOptions, ChatRequest, ChatResponseDelta, KeepAlive, OllamaFunctionTool,
 17    OllamaToolCall, get_models, preload_model, show_model, stream_chat_completion,
 18};
 19use schemars::JsonSchema;
 20use serde::{Deserialize, Serialize};
 21use settings::{Settings, SettingsStore};
 22use std::pin::Pin;
 23use std::sync::atomic::{AtomicU64, Ordering};
 24use std::{collections::BTreeMap, sync::Arc};
 25use ui::{ButtonLike, Indicator, List, prelude::*};
 26use util::ResultExt;
 27
 28use crate::AllLanguageModelSettings;
 29use crate::ui::InstructionListItem;
 30
 31const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
 32const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
 33const OLLAMA_SITE: &str = "https://ollama.com/";
 34
 35const PROVIDER_ID: &str = "ollama";
 36const PROVIDER_NAME: &str = "Ollama";
 37
 38#[derive(Default, Debug, Clone, PartialEq)]
 39pub struct OllamaSettings {
 40    pub api_url: String,
 41    pub available_models: Vec<AvailableModel>,
 42}
 43
 44#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
 45pub struct AvailableModel {
 46    /// The model name in the Ollama API (e.g. "llama3.2:latest")
 47    pub name: String,
 48    /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
 49    pub display_name: Option<String>,
 50    /// The Context Length parameter to the model (aka num_ctx or n_ctx)
 51    pub max_tokens: usize,
 52    /// The number of seconds to keep the connection open after the last request
 53    pub keep_alive: Option<KeepAlive>,
 54    /// Whether the model supports tools
 55    pub supports_tools: Option<bool>,
 56}
 57
 58pub struct OllamaLanguageModelProvider {
 59    http_client: Arc<dyn HttpClient>,
 60    state: gpui::Entity<State>,
 61}
 62
 63pub struct State {
 64    http_client: Arc<dyn HttpClient>,
 65    available_models: Vec<ollama::Model>,
 66    fetch_model_task: Option<Task<Result<()>>>,
 67    _subscription: Subscription,
 68}
 69
 70impl State {
 71    fn is_authenticated(&self) -> bool {
 72        !self.available_models.is_empty()
 73    }
 74
 75    fn fetch_models(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
 76        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
 77        let http_client = Arc::clone(&self.http_client);
 78        let api_url = settings.api_url.clone();
 79
 80        // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
 81        cx.spawn(async move |this, cx| {
 82            let models = get_models(http_client.as_ref(), &api_url, None).await?;
 83
 84            let tasks = models
 85                .into_iter()
 86                // Since there is no metadata from the Ollama API
 87                // indicating which models are embedding models,
 88                // simply filter out models with "-embed" in their name
 89                .filter(|model| !model.name.contains("-embed"))
 90                .map(|model| {
 91                    let http_client = Arc::clone(&http_client);
 92                    let api_url = api_url.clone();
 93                    async move {
 94                        let name = model.name.as_str();
 95                        let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
 96                        let ollama_model = ollama::Model::new(
 97                            name,
 98                            None,
 99                            None,
100                            Some(capabilities.supports_tools()),
101                        );
102                        Ok(ollama_model)
103                    }
104                });
105
106            // Rate-limit capability fetches
107            // since there is an arbitrary number of models available
108            let mut ollama_models: Vec<_> = futures::stream::iter(tasks)
109                .buffer_unordered(5)
110                .collect::<Vec<Result<_>>>()
111                .await
112                .into_iter()
113                .collect::<Result<Vec<_>>>()?;
114
115            ollama_models.sort_by(|a, b| a.name.cmp(&b.name));
116
117            this.update(cx, |this, cx| {
118                this.available_models = ollama_models;
119                cx.notify();
120            })
121        })
122    }
123
124    fn restart_fetch_models_task(&mut self, cx: &mut Context<Self>) {
125        let task = self.fetch_models(cx);
126        self.fetch_model_task.replace(task);
127    }
128
129    fn authenticate(&mut self, cx: &mut Context<Self>) -> Task<Result<(), AuthenticateError>> {
130        if self.is_authenticated() {
131            return Task::ready(Ok(()));
132        }
133
134        let fetch_models_task = self.fetch_models(cx);
135        cx.spawn(async move |_this, _cx| Ok(fetch_models_task.await?))
136    }
137}
138
139impl OllamaLanguageModelProvider {
140    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
141        let this = Self {
142            http_client: http_client.clone(),
143            state: cx.new(|cx| {
144                let subscription = cx.observe_global::<SettingsStore>({
145                    let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
146                    move |this: &mut State, cx| {
147                        let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
148                        if &settings != new_settings {
149                            settings = new_settings.clone();
150                            this.restart_fetch_models_task(cx);
151                            cx.notify();
152                        }
153                    }
154                });
155
156                State {
157                    http_client,
158                    available_models: Default::default(),
159                    fetch_model_task: None,
160                    _subscription: subscription,
161                }
162            }),
163        };
164        this.state
165            .update(cx, |state, cx| state.restart_fetch_models_task(cx));
166        this
167    }
168}
169
170impl LanguageModelProviderState for OllamaLanguageModelProvider {
171    type ObservableEntity = State;
172
173    fn observable_entity(&self) -> Option<gpui::Entity<Self::ObservableEntity>> {
174        Some(self.state.clone())
175    }
176}
177
178impl LanguageModelProvider for OllamaLanguageModelProvider {
179    fn id(&self) -> LanguageModelProviderId {
180        LanguageModelProviderId(PROVIDER_ID.into())
181    }
182
183    fn name(&self) -> LanguageModelProviderName {
184        LanguageModelProviderName(PROVIDER_NAME.into())
185    }
186
187    fn icon(&self) -> IconName {
188        IconName::AiOllama
189    }
190
191    fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
192        self.provided_models(cx).into_iter().next()
193    }
194
195    fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
196        self.default_model(cx)
197    }
198
199    fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
200        let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
201
202        // Add models from the Ollama API
203        for model in self.state.read(cx).available_models.iter() {
204            models.insert(model.name.clone(), model.clone());
205        }
206
207        // Override with available models from settings
208        for model in AllLanguageModelSettings::get_global(cx)
209            .ollama
210            .available_models
211            .iter()
212        {
213            models.insert(
214                model.name.clone(),
215                ollama::Model {
216                    name: model.name.clone(),
217                    display_name: model.display_name.clone(),
218                    max_tokens: model.max_tokens,
219                    keep_alive: model.keep_alive.clone(),
220                    supports_tools: model.supports_tools,
221                },
222            );
223        }
224
225        models
226            .into_values()
227            .map(|model| {
228                Arc::new(OllamaLanguageModel {
229                    id: LanguageModelId::from(model.name.clone()),
230                    model: model.clone(),
231                    http_client: self.http_client.clone(),
232                    request_limiter: RateLimiter::new(4),
233                }) as Arc<dyn LanguageModel>
234            })
235            .collect()
236    }
237
238    fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &App) {
239        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
240        let http_client = self.http_client.clone();
241        let api_url = settings.api_url.clone();
242        let id = model.id().0.to_string();
243        cx.spawn(async move |_| preload_model(http_client, &api_url, &id).await)
244            .detach_and_log_err(cx);
245    }
246
247    fn is_authenticated(&self, cx: &App) -> bool {
248        self.state.read(cx).is_authenticated()
249    }
250
251    fn authenticate(&self, cx: &mut App) -> Task<Result<(), AuthenticateError>> {
252        self.state.update(cx, |state, cx| state.authenticate(cx))
253    }
254
255    fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView {
256        let state = self.state.clone();
257        cx.new(|cx| ConfigurationView::new(state, window, cx))
258            .into()
259    }
260
261    fn reset_credentials(&self, cx: &mut App) -> Task<Result<()>> {
262        self.state.update(cx, |state, cx| state.fetch_models(cx))
263    }
264}
265
266pub struct OllamaLanguageModel {
267    id: LanguageModelId,
268    model: ollama::Model,
269    http_client: Arc<dyn HttpClient>,
270    request_limiter: RateLimiter,
271}
272
273impl OllamaLanguageModel {
274    fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
275        ChatRequest {
276            model: self.model.name.clone(),
277            messages: request
278                .messages
279                .into_iter()
280                .map(|msg| match msg.role {
281                    Role::User => ChatMessage::User {
282                        content: msg.string_contents(),
283                    },
284                    Role::Assistant => ChatMessage::Assistant {
285                        content: msg.string_contents(),
286                        tool_calls: None,
287                    },
288                    Role::System => ChatMessage::System {
289                        content: msg.string_contents(),
290                    },
291                })
292                .collect(),
293            keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
294            stream: true,
295            options: Some(ChatOptions {
296                num_ctx: Some(self.model.max_tokens),
297                stop: Some(request.stop),
298                temperature: request.temperature.or(Some(1.0)),
299                ..Default::default()
300            }),
301            tools: request.tools.into_iter().map(tool_into_ollama).collect(),
302        }
303    }
304}
305
306impl LanguageModel for OllamaLanguageModel {
307    fn id(&self) -> LanguageModelId {
308        self.id.clone()
309    }
310
311    fn name(&self) -> LanguageModelName {
312        LanguageModelName::from(self.model.display_name().to_string())
313    }
314
315    fn provider_id(&self) -> LanguageModelProviderId {
316        LanguageModelProviderId(PROVIDER_ID.into())
317    }
318
319    fn provider_name(&self) -> LanguageModelProviderName {
320        LanguageModelProviderName(PROVIDER_NAME.into())
321    }
322
323    fn supports_tools(&self) -> bool {
324        self.model.supports_tools.unwrap_or(false)
325    }
326
327    fn telemetry_id(&self) -> String {
328        format!("ollama/{}", self.model.id())
329    }
330
331    fn max_token_count(&self) -> usize {
332        self.model.max_token_count()
333    }
334
335    fn count_tokens(
336        &self,
337        request: LanguageModelRequest,
338        _cx: &App,
339    ) -> BoxFuture<'static, Result<usize>> {
340        // There is no endpoint for this _yet_ in Ollama
341        // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
342        let token_count = request
343            .messages
344            .iter()
345            .map(|msg| msg.string_contents().chars().count())
346            .sum::<usize>()
347            / 4;
348
349        async move { Ok(token_count) }.boxed()
350    }
351
352    fn stream_completion(
353        &self,
354        request: LanguageModelRequest,
355        cx: &AsyncApp,
356    ) -> BoxFuture<
357        'static,
358        Result<
359            BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
360        >,
361    > {
362        let request = self.to_ollama_request(request);
363
364        let http_client = self.http_client.clone();
365        let Ok(api_url) = cx.update(|cx| {
366            let settings = &AllLanguageModelSettings::get_global(cx).ollama;
367            settings.api_url.clone()
368        }) else {
369            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
370        };
371
372        let future = self.request_limiter.stream(async move {
373            let stream = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
374            let stream = map_to_language_model_completion_events(stream);
375            Ok(stream)
376        });
377
378        future.map_ok(|f| f.boxed()).boxed()
379    }
380}
381
382fn map_to_language_model_completion_events(
383    stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
384) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
385    // Used for creating unique tool use ids
386    static TOOL_CALL_COUNTER: AtomicU64 = AtomicU64::new(0);
387
388    struct State {
389        stream: Pin<Box<dyn Stream<Item = anyhow::Result<ChatResponseDelta>> + Send>>,
390        used_tools: bool,
391    }
392
393    // We need to create a ToolUse and Stop event from a single
394    // response from the original stream
395    let stream = stream::unfold(
396        State {
397            stream,
398            used_tools: false,
399        },
400        async move |mut state| {
401            let response = state.stream.next().await?;
402
403            let delta = match response {
404                Ok(delta) => delta,
405                Err(e) => {
406                    let event = Err(LanguageModelCompletionError::Other(anyhow!(e)));
407                    return Some((vec![event], state));
408                }
409            };
410
411            let mut events = Vec::new();
412
413            match delta.message {
414                ChatMessage::User { content } => {
415                    events.push(Ok(LanguageModelCompletionEvent::Text(content)));
416                }
417                ChatMessage::System { content } => {
418                    events.push(Ok(LanguageModelCompletionEvent::Text(content)));
419                }
420                ChatMessage::Assistant {
421                    content,
422                    tool_calls,
423                } => {
424                    // Check for tool calls
425                    if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
426                        match tool_call {
427                            OllamaToolCall::Function(function) => {
428                                let tool_id = format!(
429                                    "{}-{}",
430                                    &function.name,
431                                    TOOL_CALL_COUNTER.fetch_add(1, Ordering::Relaxed)
432                                );
433                                let event =
434                                    LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
435                                        id: LanguageModelToolUseId::from(tool_id),
436                                        name: Arc::from(function.name),
437                                        raw_input: function.arguments.to_string(),
438                                        input: function.arguments,
439                                        is_input_complete: true,
440                                    });
441                                events.push(Ok(event));
442                                state.used_tools = true;
443                            }
444                        }
445                    } else {
446                        events.push(Ok(LanguageModelCompletionEvent::Text(content)));
447                    }
448                }
449            };
450
451            if delta.done {
452                if state.used_tools {
453                    state.used_tools = false;
454                    events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
455                } else {
456                    events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
457                }
458            }
459
460            Some((events, state))
461        },
462    );
463
464    stream.flat_map(futures::stream::iter)
465}
466
467struct ConfigurationView {
468    state: gpui::Entity<State>,
469    loading_models_task: Option<Task<()>>,
470}
471
472impl ConfigurationView {
473    pub fn new(state: gpui::Entity<State>, window: &mut Window, cx: &mut Context<Self>) -> Self {
474        let loading_models_task = Some(cx.spawn_in(window, {
475            let state = state.clone();
476            async move |this, cx| {
477                if let Some(task) = state
478                    .update(cx, |state, cx| state.authenticate(cx))
479                    .log_err()
480                {
481                    task.await.log_err();
482                }
483                this.update(cx, |this, cx| {
484                    this.loading_models_task = None;
485                    cx.notify();
486                })
487                .log_err();
488            }
489        }));
490
491        Self {
492            state,
493            loading_models_task,
494        }
495    }
496
497    fn retry_connection(&self, cx: &mut App) {
498        self.state
499            .update(cx, |state, cx| state.fetch_models(cx))
500            .detach_and_log_err(cx);
501    }
502}
503
504impl Render for ConfigurationView {
505    fn render(&mut self, _: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
506        let is_authenticated = self.state.read(cx).is_authenticated();
507
508        let ollama_intro =
509            "Get up & running with Llama 3.3, Mistral, Gemma 2, and other LLMs with Ollama.";
510
511        if self.loading_models_task.is_some() {
512            div().child(Label::new("Loading models...")).into_any()
513        } else {
514            v_flex()
515                .gap_2()
516                .child(
517                    v_flex().gap_1().child(Label::new(ollama_intro)).child(
518                        List::new()
519                            .child(InstructionListItem::text_only("Ollama must be running with at least one model installed to use it in the assistant."))
520                            .child(InstructionListItem::text_only(
521                                "Once installed, try `ollama run llama3.2`",
522                            )),
523                    ),
524                )
525                .child(
526                    h_flex()
527                        .w_full()
528                        .justify_between()
529                        .gap_2()
530                        .child(
531                            h_flex()
532                                .w_full()
533                                .gap_2()
534                                .map(|this| {
535                                    if is_authenticated {
536                                        this.child(
537                                            Button::new("ollama-site", "Ollama")
538                                                .style(ButtonStyle::Subtle)
539                                                .icon(IconName::ArrowUpRight)
540                                                .icon_size(IconSize::XSmall)
541                                                .icon_color(Color::Muted)
542                                                .on_click(move |_, _, cx| cx.open_url(OLLAMA_SITE))
543                                                .into_any_element(),
544                                        )
545                                    } else {
546                                        this.child(
547                                            Button::new(
548                                                "download_ollama_button",
549                                                "Download Ollama",
550                                            )
551                                            .style(ButtonStyle::Subtle)
552                                            .icon(IconName::ArrowUpRight)
553                                            .icon_size(IconSize::XSmall)
554                                            .icon_color(Color::Muted)
555                                            .on_click(move |_, _, cx| {
556                                                cx.open_url(OLLAMA_DOWNLOAD_URL)
557                                            })
558                                            .into_any_element(),
559                                        )
560                                    }
561                                })
562                                .child(
563                                    Button::new("view-models", "All Models")
564                                        .style(ButtonStyle::Subtle)
565                                        .icon(IconName::ArrowUpRight)
566                                        .icon_size(IconSize::XSmall)
567                                        .icon_color(Color::Muted)
568                                        .on_click(move |_, _, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
569                                ),
570                        )
571                        .map(|this| {
572                            if is_authenticated {
573                                this.child(
574                                    ButtonLike::new("connected")
575                                        .disabled(true)
576                                        .cursor_style(gpui::CursorStyle::Arrow)
577                                        .child(
578                                            h_flex()
579                                                .gap_2()
580                                                .child(Indicator::dot().color(Color::Success))
581                                                .child(Label::new("Connected"))
582                                                .into_any_element(),
583                                        ),
584                                )
585                            } else {
586                                this.child(
587                                    Button::new("retry_ollama_models", "Connect")
588                                        .icon_position(IconPosition::Start)
589                                        .icon_size(IconSize::XSmall)
590                                        .icon(IconName::Play)
591                                        .on_click(cx.listener(move |this, _, _, cx| {
592                                            this.retry_connection(cx)
593                                        })),
594                                )
595                            }
596                        })
597                )
598                .into_any()
599        }
600    }
601}
602
603fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
604    ollama::OllamaTool::Function {
605        function: OllamaFunctionTool {
606            name: tool.name,
607            description: Some(tool.description),
608            parameters: Some(tool.input_schema),
609        },
610    }
611}