Use fallback BPE if the language model doesn't have one (#6848)
Todsaporn Banjerdkit
,
Piotr Osiewicz
, and
Marshall Bowers
created 2 years ago
Release Notes:
- Added a fallback BPE if the language model doesn't have one.
---------
Co-authored-by: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com>
Co-authored-by: Marshall Bowers <elliott.codes@gmail.com>
Change summary
crates/ai/src/providers/open_ai/embedding.rs | 2 +-
crates/ai/src/providers/open_ai/model.rs | 8 +++++---
2 files changed, 6 insertions(+), 4 deletions(-)
Detailed changes
@@ -30,7 +30,7 @@ use crate::providers::open_ai::OpenAiLanguageModel;
use crate::providers::open_ai::OPEN_AI_API_URL;
lazy_static! {
- static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap();
+ pub(crate) static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap();
}
#[derive(Clone)]
@@ -1,9 +1,10 @@
use anyhow::anyhow;
use tiktoken_rs::CoreBPE;
-use util::ResultExt;
use crate::models::{LanguageModel, TruncationDirection};
+use super::OPEN_AI_BPE_TOKENIZER;
+
#[derive(Clone)]
pub struct OpenAiLanguageModel {
name: String,
@@ -12,10 +13,11 @@ pub struct OpenAiLanguageModel {
impl OpenAiLanguageModel {
pub fn load(model_name: &str) -> Self {
- let bpe = tiktoken_rs::get_bpe_from_model(model_name).log_err();
+ let bpe =
+ tiktoken_rs::get_bpe_from_model(model_name).unwrap_or(OPEN_AI_BPE_TOKENIZER.to_owned());
OpenAiLanguageModel {
name: model_name.to_string(),
- bpe,
+ bpe: Some(bpe),
}
}
}