diff --git a/crates/ai/src/providers/open_ai/embedding.rs b/crates/ai/src/providers/open_ai/embedding.rs index 7480a454a1b474295cdd3763f5251a2335001449..4a8b051df3fd7fc4ae678d9e641e90eb2395fa62 100644 --- a/crates/ai/src/providers/open_ai/embedding.rs +++ b/crates/ai/src/providers/open_ai/embedding.rs @@ -30,7 +30,7 @@ use crate::providers::open_ai::OpenAiLanguageModel; use crate::providers::open_ai::OPEN_AI_API_URL; lazy_static! { - static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap(); + pub(crate) static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap(); } #[derive(Clone)] diff --git a/crates/ai/src/providers/open_ai/model.rs b/crates/ai/src/providers/open_ai/model.rs index ba3488d7dd8e0832d7c17886ce0abe80c6a1f6a1..21ea0334bdcfcb0c52076bc400390f76176ca84c 100644 --- a/crates/ai/src/providers/open_ai/model.rs +++ b/crates/ai/src/providers/open_ai/model.rs @@ -1,9 +1,10 @@ use anyhow::anyhow; use tiktoken_rs::CoreBPE; -use util::ResultExt; use crate::models::{LanguageModel, TruncationDirection}; +use super::OPEN_AI_BPE_TOKENIZER; + #[derive(Clone)] pub struct OpenAiLanguageModel { name: String, @@ -12,10 +13,11 @@ pub struct OpenAiLanguageModel { impl OpenAiLanguageModel { pub fn load(model_name: &str) -> Self { - let bpe = tiktoken_rs::get_bpe_from_model(model_name).log_err(); + let bpe = + tiktoken_rs::get_bpe_from_model(model_name).unwrap_or(OPEN_AI_BPE_TOKENIZER.to_owned()); OpenAiLanguageModel { name: model_name.to_string(), - bpe, + bpe: Some(bpe), } } }