crates/ai/src/ai.rs 🔗
@@ -1,3 +1,4 @@
pub mod completion;
pub mod embedding;
+pub mod models;
pub mod templates;
KCaverly created
crates/ai/src/ai.rs | 1
crates/ai/src/models.rs | 49 ++++++++++++++++++++++++++++
crates/ai/src/templates/base.rs | 54 ++++++++++++------------------
crates/ai/src/templates/preamble.rs | 42 +++++++++++++++++------
4 files changed, 102 insertions(+), 44 deletions(-)
@@ -1,3 +1,4 @@
pub mod completion;
pub mod embedding;
+pub mod models;
pub mod templates;
@@ -0,0 +1,49 @@
+use anyhow::anyhow;
+use tiktoken_rs::CoreBPE;
+use util::ResultExt;
+
+pub trait LanguageModel {
+ fn name(&self) -> String;
+ fn count_tokens(&self, content: &str) -> anyhow::Result<usize>;
+ fn truncate(&self, content: &str, length: usize) -> anyhow::Result<String>;
+ fn capacity(&self) -> anyhow::Result<usize>;
+}
+
+struct OpenAILanguageModel {
+ name: String,
+ bpe: Option<CoreBPE>,
+}
+
+impl OpenAILanguageModel {
+ pub fn load(model_name: String) -> Self {
+ let bpe = tiktoken_rs::get_bpe_from_model(&model_name).log_err();
+ OpenAILanguageModel {
+ name: model_name,
+ bpe,
+ }
+ }
+}
+
+impl LanguageModel for OpenAILanguageModel {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+ fn count_tokens(&self, content: &str) -> anyhow::Result<usize> {
+ if let Some(bpe) = &self.bpe {
+ anyhow::Ok(bpe.encode_with_special_tokens(content).len())
+ } else {
+ Err(anyhow!("bpe for open ai model was not retrieved"))
+ }
+ }
+ fn truncate(&self, content: &str, length: usize) -> anyhow::Result<String> {
+ if let Some(bpe) = &self.bpe {
+ let tokens = bpe.encode_with_special_tokens(content);
+ bpe.decode(tokens[..length].to_vec())
+ } else {
+ Err(anyhow!("bpe for open ai model was not retrieved"))
+ }
+ }
+ fn capacity(&self) -> anyhow::Result<usize> {
+ anyhow::Ok(tiktoken_rs::model::get_context_size(&self.name))
+ }
+}
@@ -1,17 +1,11 @@
-use std::fmt::Write;
-use std::{cmp::Reverse, sync::Arc};
+use std::cmp::Reverse;
+use std::sync::Arc;
use util::ResultExt;
+use crate::models::LanguageModel;
use crate::templates::repository_context::PromptCodeSnippet;
-pub trait LanguageModel {
- fn name(&self) -> String;
- fn count_tokens(&self, content: &str) -> usize;
- fn truncate(&self, content: &str, length: usize) -> String;
- fn capacity(&self) -> usize;
-}
-
pub(crate) enum PromptFileType {
Text,
Code,
@@ -73,7 +67,7 @@ impl PromptChain {
pub fn generate(&self, truncate: bool) -> anyhow::Result<(String, usize)> {
// Argsort based on Prompt Priority
let seperator = "\n";
- let seperator_tokens = self.args.model.count_tokens(seperator);
+ let seperator_tokens = self.args.model.count_tokens(seperator)?;
let mut sorted_indices = (0..self.templates.len()).collect::<Vec<_>>();
sorted_indices.sort_by_key(|&i| Reverse(&self.templates[i].0));
@@ -81,7 +75,7 @@ impl PromptChain {
// If Truncate
let mut tokens_outstanding = if truncate {
- Some(self.args.model.capacity() - self.args.reserved_tokens)
+ Some(self.args.model.capacity()? - self.args.reserved_tokens)
} else {
None
};
@@ -111,7 +105,7 @@ impl PromptChain {
}
let full_prompt = prompts.join(seperator);
- let total_token_count = self.args.model.count_tokens(&full_prompt);
+ let total_token_count = self.args.model.count_tokens(&full_prompt)?;
anyhow::Ok((prompts.join(seperator), total_token_count))
}
}
@@ -131,10 +125,10 @@ pub(crate) mod tests {
) -> anyhow::Result<(String, usize)> {
let mut content = "This is a test prompt template".to_string();
- let mut token_count = args.model.count_tokens(&content);
+ let mut token_count = args.model.count_tokens(&content)?;
if let Some(max_token_length) = max_token_length {
if token_count > max_token_length {
- content = args.model.truncate(&content, max_token_length);
+ content = args.model.truncate(&content, max_token_length)?;
token_count = max_token_length;
}
}
@@ -152,10 +146,10 @@ pub(crate) mod tests {
) -> anyhow::Result<(String, usize)> {
let mut content = "This is a low priority test prompt template".to_string();
- let mut token_count = args.model.count_tokens(&content);
+ let mut token_count = args.model.count_tokens(&content)?;
if let Some(max_token_length) = max_token_length {
if token_count > max_token_length {
- content = args.model.truncate(&content, max_token_length);
+ content = args.model.truncate(&content, max_token_length)?;
token_count = max_token_length;
}
}
@@ -169,26 +163,22 @@ pub(crate) mod tests {
capacity: usize,
}
- impl DummyLanguageModel {
- fn set_capacity(&mut self, capacity: usize) {
- self.capacity = capacity
- }
- }
-
impl LanguageModel for DummyLanguageModel {
fn name(&self) -> String {
"dummy".to_string()
}
- fn count_tokens(&self, content: &str) -> usize {
- content.chars().collect::<Vec<char>>().len()
+ fn count_tokens(&self, content: &str) -> anyhow::Result<usize> {
+ anyhow::Ok(content.chars().collect::<Vec<char>>().len())
}
- fn truncate(&self, content: &str, length: usize) -> String {
- content.chars().collect::<Vec<char>>()[..length]
- .into_iter()
- .collect::<String>()
+ fn truncate(&self, content: &str, length: usize) -> anyhow::Result<String> {
+ anyhow::Ok(
+ content.chars().collect::<Vec<char>>()[..length]
+ .into_iter()
+ .collect::<String>(),
+ )
}
- fn capacity(&self) -> usize {
- self.capacity
+ fn capacity(&self) -> anyhow::Result<usize> {
+ anyhow::Ok(self.capacity)
}
}
@@ -215,7 +205,7 @@ pub(crate) mod tests {
.to_string()
);
- assert_eq!(model.count_tokens(&prompt), token_count);
+ assert_eq!(model.count_tokens(&prompt).unwrap(), token_count);
// Testing with Truncation Off
// Should ignore capacity and return all prompts
@@ -242,7 +232,7 @@ pub(crate) mod tests {
.to_string()
);
- assert_eq!(model.count_tokens(&prompt), token_count);
+ assert_eq!(model.count_tokens(&prompt).unwrap(), token_count);
// Testing with Truncation Off
// Should ignore capacity and return all prompts
@@ -4,31 +4,49 @@ use std::fmt::Write;
struct EngineerPreamble {}
impl PromptTemplate for EngineerPreamble {
- fn generate(&self, args: &PromptArguments, max_token_length: Option<usize>) -> String {
- let mut prompt = String::new();
+ fn generate(
+ &self,
+ args: &PromptArguments,
+ max_token_length: Option<usize>,
+ ) -> anyhow::Result<(String, usize)> {
+ let mut prompts = Vec::new();
match args.get_file_type() {
PromptFileType::Code => {
- writeln!(
- prompt,
+ prompts.push(format!(
"You are an expert {} engineer.",
args.language_name.clone().unwrap_or("".to_string())
- )
- .unwrap();
+ ));
}
PromptFileType::Text => {
- writeln!(prompt, "You are an expert engineer.").unwrap();
+ prompts.push("You are an expert engineer.".to_string());
}
}
if let Some(project_name) = args.project_name.clone() {
- writeln!(
- prompt,
+ prompts.push(format!(
"You are currently working inside the '{project_name}' in Zed the code editor."
- )
- .unwrap();
+ ));
}
- prompt
+ if let Some(mut remaining_tokens) = max_token_length {
+ let mut prompt = String::new();
+ let mut total_count = 0;
+ for prompt_piece in prompts {
+ let prompt_token_count =
+ args.model.count_tokens(&prompt_piece)? + args.model.count_tokens("\n")?;
+ if remaining_tokens > prompt_token_count {
+ writeln!(prompt, "{prompt_piece}").unwrap();
+ remaining_tokens -= prompt_token_count;
+ total_count += prompt_token_count;
+ }
+ }
+
+ anyhow::Ok((prompt, total_count))
+ } else {
+ let prompt = prompts.join("\n");
+ let token_count = args.model.count_tokens(&prompt)?;
+ anyhow::Ok((prompt, token_count))
+ }
}
}