Use fallback BPE if the language model doesn't have one (#6848)

Todsaporn Banjerdkit , Piotr Osiewicz , and Marshall Bowers created

Release Notes:

- Added a fallback BPE if the language model doesn't have one.

---------

Co-authored-by: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com>
Co-authored-by: Marshall Bowers <elliott.codes@gmail.com>

Change summary

crates/ai/src/providers/open_ai/embedding.rs | 2 +-
crates/ai/src/providers/open_ai/model.rs     | 8 +++++---
2 files changed, 6 insertions(+), 4 deletions(-)

Detailed changes

crates/ai/src/providers/open_ai/embedding.rs 🔗

@@ -30,7 +30,7 @@ use crate::providers::open_ai::OpenAiLanguageModel;
 use crate::providers::open_ai::OPEN_AI_API_URL;
 
 lazy_static! {
-    static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap();
+    pub(crate) static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap();
 }
 
 #[derive(Clone)]

crates/ai/src/providers/open_ai/model.rs 🔗

@@ -1,9 +1,10 @@
 use anyhow::anyhow;
 use tiktoken_rs::CoreBPE;
-use util::ResultExt;
 
 use crate::models::{LanguageModel, TruncationDirection};
 
+use super::OPEN_AI_BPE_TOKENIZER;
+
 #[derive(Clone)]
 pub struct OpenAiLanguageModel {
     name: String,
@@ -12,10 +13,11 @@ pub struct OpenAiLanguageModel {
 
 impl OpenAiLanguageModel {
     pub fn load(model_name: &str) -> Self {
-        let bpe = tiktoken_rs::get_bpe_from_model(model_name).log_err();
+        let bpe =
+            tiktoken_rs::get_bpe_from_model(model_name).unwrap_or(OPEN_AI_BPE_TOKENIZER.to_owned());
         OpenAiLanguageModel {
             name: model_name.to_string(),
-            bpe,
+            bpe: Some(bpe),
         }
     }
 }