Detailed changes
@@ -661,7 +661,6 @@ dependencies = [
"serde_json",
"strum 0.27.2",
"thiserror 2.0.17",
- "tiktoken-rs",
]
[[package]]
@@ -672,9 +671,9 @@ checksum = "34cd60c5e3152cef0a592f1b296f1cc93715d89d2551d85315828c3a09575ff4"
[[package]]
name = "anyhow"
-version = "1.0.100"
+version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "approx"
@@ -2209,9 +2208,9 @@ dependencies = [
[[package]]
name = "bstr"
-version = "1.12.0"
+version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
dependencies = [
"memchr",
"regex-automata",
@@ -7533,7 +7532,6 @@ dependencies = [
"serde",
"serde_json",
"strum 0.27.2",
- "tiktoken-rs",
]
[[package]]
@@ -9574,7 +9572,6 @@ dependencies = [
"settings",
"smol",
"strum 0.27.2",
- "tiktoken-rs",
"tokio",
"ui",
"ui_input",
@@ -9602,7 +9599,6 @@ dependencies = [
"serde_json",
"smol",
"thiserror 2.0.17",
- "x_ai",
]
[[package]]
@@ -11766,7 +11762,6 @@ dependencies = [
"serde_json",
"strum 0.27.2",
"thiserror 2.0.17",
- "tiktoken-rs",
]
[[package]]
@@ -14414,9 +14409,9 @@ dependencies = [
[[package]]
name = "regex"
-version = "1.12.2"
+version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
@@ -17855,20 +17850,6 @@ dependencies = [
"zune-jpeg 0.5.15",
]
-[[package]]
-name = "tiktoken-rs"
-version = "0.9.1"
-source = "git+https://github.com/zed-industries/tiktoken-rs?rev=2570c4387a8505fb8f1d3f3557454b474f1e8271#2570c4387a8505fb8f1d3f3557454b474f1e8271"
-dependencies = [
- "anyhow",
- "base64 0.22.1",
- "bstr",
- "fancy-regex 0.16.2",
- "lazy_static",
- "regex",
- "rustc-hash 1.1.0",
-]
-
[[package]]
name = "time"
version = "0.3.47"
@@ -21920,11 +21901,9 @@ name = "x_ai"
version = "0.1.0"
dependencies = [
"anyhow",
- "language_model_core",
"schemars",
"serde",
"strum 0.27.2",
- "tiktoken-rs",
]
[[package]]
@@ -733,7 +733,6 @@ sysinfo = "0.37.0"
take-until = "0.2.0"
tempfile = "3.20.0"
thiserror = "2.0.12"
-tiktoken-rs = { git = "https://github.com/zed-industries/tiktoken-rs", rev = "2570c4387a8505fb8f1d3f3557454b474f1e8271" }
time = { version = "0.3", features = [
"macros",
"parsing",
@@ -566,7 +566,7 @@ impl PickerDelegate for LanguageModelPickerDelegate {
mod tests {
use super::*;
use futures::{future::BoxFuture, stream::BoxStream};
- use gpui::{AsyncApp, TestAppContext, http_client};
+ use gpui::{AsyncApp, TestAppContext};
use language_model::{
LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelId,
LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
@@ -630,14 +630,6 @@ mod tests {
1000
}
- fn count_tokens(
- &self,
- _: LanguageModelRequest,
- _: &App,
- ) -> BoxFuture<'static, http_client::Result<u64>> {
- unimplemented!()
- }
-
fn stream_completion(
&self,
_: LanguageModelRequest,
@@ -28,6 +28,3 @@ serde.workspace = true
serde_json.workspace = true
strum.workspace = true
thiserror.workspace = true
-tiktoken-rs.workspace = true
-
-
@@ -1000,71 +1000,6 @@ pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
.ok()
}
-/// Request body for the token counting API.
-/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
-#[derive(Debug, Serialize)]
-pub struct CountTokensRequest {
- pub model: String,
- pub messages: Vec<Message>,
- #[serde(default, skip_serializing_if = "Option::is_none")]
- pub system: Option<StringOrContents>,
- #[serde(default, skip_serializing_if = "Vec::is_empty")]
- pub tools: Vec<Tool>,
- #[serde(default, skip_serializing_if = "Option::is_none")]
- pub thinking: Option<Thinking>,
- #[serde(default, skip_serializing_if = "Option::is_none")]
- pub tool_choice: Option<ToolChoice>,
-}
-
-/// Response from the token counting API.
-#[derive(Debug, Deserialize)]
-pub struct CountTokensResponse {
- pub input_tokens: u64,
-}
-
-/// Count the number of tokens in a message without creating it.
-pub async fn count_tokens(
- client: &dyn HttpClient,
- api_url: &str,
- api_key: &str,
- request: CountTokensRequest,
-) -> Result<CountTokensResponse, AnthropicError> {
- let uri = format!("{api_url}/v1/messages/count_tokens");
-
- let request_builder = HttpRequest::builder()
- .method(Method::POST)
- .uri(uri)
- .header("Anthropic-Version", "2023-06-01")
- .header("X-Api-Key", api_key.trim())
- .header("Content-Type", "application/json");
-
- let serialized_request =
- serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
- let http_request = request_builder
- .body(AsyncBody::from(serialized_request))
- .map_err(AnthropicError::BuildRequestBody)?;
-
- let mut response = client
- .send(http_request)
- .await
- .map_err(AnthropicError::HttpSend)?;
-
- let rate_limits = RateLimitInfo::from_headers(response.headers());
-
- if response.status().is_success() {
- let mut body = String::new();
- response
- .body_mut()
- .read_to_string(&mut body)
- .await
- .map_err(AnthropicError::ReadResponse)?;
-
- serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
- } else {
- Err(handle_error_response(response, rate_limits).await)
- }
-}
-
// -- Conversions from/to `language_model_core` types --
impl From<language_model_core::Speed> for Speed {
@@ -11,9 +11,9 @@ use std::pin::Pin;
use std::str::FromStr;
use crate::{
- AnthropicError, AnthropicModelMode, CacheControl, CacheControlType, ContentDelta,
- CountTokensRequest, Event, ImageSource, Message, RequestContent, ResponseContent,
- StringOrContents, Thinking, Tool, ToolChoice, ToolResultContent, ToolResultPart, Usage,
+ AnthropicError, AnthropicModelMode, CacheControl, CacheControlType, ContentDelta, Event,
+ ImageSource, Message, RequestContent, ResponseContent, StringOrContents, Thinking, Tool,
+ ToolChoice, ToolResultContent, ToolResultPart, Usage,
};
fn to_anthropic_content(content: MessageContent) -> Option<RequestContent> {
@@ -92,152 +92,6 @@ fn to_anthropic_content(content: MessageContent) -> Option<RequestContent> {
}
}
-/// Convert a LanguageModelRequest to an Anthropic CountTokensRequest.
-pub fn into_anthropic_count_tokens_request(
- request: LanguageModelRequest,
- model: String,
- mode: AnthropicModelMode,
-) -> CountTokensRequest {
- let mut new_messages: Vec<Message> = Vec::new();
- let mut system_message = String::new();
-
- for message in request.messages {
- if message.contents_empty() {
- continue;
- }
-
- match message.role {
- Role::User | Role::Assistant => {
- let anthropic_message_content: Vec<RequestContent> = message
- .content
- .into_iter()
- .filter_map(to_anthropic_content)
- .collect();
- let anthropic_role = match message.role {
- Role::User => crate::Role::User,
- Role::Assistant => crate::Role::Assistant,
- Role::System => unreachable!("System role should never occur here"),
- };
- if anthropic_message_content.is_empty() {
- continue;
- }
-
- if let Some(last_message) = new_messages.last_mut()
- && last_message.role == anthropic_role
- {
- last_message.content.extend(anthropic_message_content);
- continue;
- }
-
- new_messages.push(Message {
- role: anthropic_role,
- content: anthropic_message_content,
- });
- }
- Role::System => {
- if !system_message.is_empty() {
- system_message.push_str("\n\n");
- }
- system_message.push_str(&message.string_contents());
- }
- }
- }
-
- CountTokensRequest {
- model,
- messages: new_messages,
- system: if system_message.is_empty() {
- None
- } else {
- Some(StringOrContents::String(system_message))
- },
- thinking: if request.thinking_allowed {
- match mode {
- AnthropicModelMode::Thinking { budget_tokens } => {
- Some(Thinking::Enabled { budget_tokens })
- }
- AnthropicModelMode::AdaptiveThinking => Some(Thinking::Adaptive),
- AnthropicModelMode::Default => None,
- }
- } else {
- None
- },
- tools: request
- .tools
- .into_iter()
- .map(|tool| Tool {
- name: tool.name,
- description: tool.description,
- input_schema: tool.input_schema,
- eager_input_streaming: tool.use_input_streaming,
- })
- .collect(),
- tool_choice: request.tool_choice.map(|choice| match choice {
- LanguageModelToolChoice::Auto => ToolChoice::Auto,
- LanguageModelToolChoice::Any => ToolChoice::Any,
- LanguageModelToolChoice::None => ToolChoice::None,
- }),
- }
-}
-
-/// Estimate tokens using tiktoken. Used as a fallback when the API is unavailable,
-/// or by providers (like Zed Cloud) that don't have direct Anthropic API access.
-pub fn count_anthropic_tokens_with_tiktoken(request: LanguageModelRequest) -> Result<u64> {
- let messages = request.messages;
- let mut tokens_from_images = 0;
- let mut string_messages = Vec::with_capacity(messages.len());
-
- for message in messages {
- let mut string_contents = String::new();
-
- for content in message.content {
- match content {
- MessageContent::Text(text) => {
- string_contents.push_str(&text);
- }
- MessageContent::Thinking { .. } => {
- // Thinking blocks are not included in the input token count.
- }
- MessageContent::RedactedThinking(_) => {
- // Thinking blocks are not included in the input token count.
- }
- MessageContent::Image(image) => {
- tokens_from_images += image.estimate_tokens();
- }
- MessageContent::ToolUse(_tool_use) => {
- // TODO: Estimate token usage from tool uses.
- }
- MessageContent::ToolResult(tool_result) => match &tool_result.content {
- LanguageModelToolResultContent::Text(text) => {
- string_contents.push_str(text);
- }
- LanguageModelToolResultContent::Image(image) => {
- tokens_from_images += image.estimate_tokens();
- }
- },
- }
- }
-
- if !string_contents.is_empty() {
- string_messages.push(tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(string_contents),
- name: None,
- function_call: None,
- });
- }
- }
-
- // Tiktoken doesn't yet support these models, so we manually use the
- // same tokenizer as GPT-4.
- tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages)
- .map(|tokens| (tokens + tokens_from_images) as u64)
-}
-
pub fn into_anthropic(
request: LanguageModelRequest,
model: String,
@@ -268,18 +268,6 @@ pub struct WebSearchResult {
pub text: String,
}
-#[derive(Serialize, Deserialize)]
-pub struct CountTokensBody {
- pub provider: LanguageModelProvider,
- pub model: String,
- pub provider_request: serde_json::Value,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct CountTokensResponse {
- pub tokens: usize,
-}
-
#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize)]
pub struct LanguageModelId(pub Arc<str>);
@@ -24,4 +24,3 @@ schemars = { workspace = true, optional = true }
serde.workspace = true
serde_json.workspace = true
strum.workspace = true
-tiktoken-rs.workspace = true
@@ -313,29 +313,6 @@ impl GoogleEventMapper {
}
}
-/// Count tokens for a Google AI model using tiktoken. This is synchronous;
-/// callers should spawn it on a background thread if needed.
-pub fn count_google_tokens(request: LanguageModelRequest) -> Result<u64> {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- // Tiktoken doesn't yet support these models, so we manually use the
- // same tokenizer as GPT-4.
- tiktoken_rs::num_tokens_from_messages("gpt-4", &messages).map(|tokens| tokens as u64)
-}
-
fn update_usage(usage: &mut UsageMetadata, new: &UsageMetadata) {
if let Some(prompt_token_count) = new.prompt_token_count {
usage.prompt_token_count = Some(prompt_token_count);
@@ -64,38 +64,6 @@ pub async fn stream_generate_content(
}
}
-pub async fn count_tokens(
- client: &dyn HttpClient,
- api_url: &str,
- api_key: &str,
- request: CountTokensRequest,
-) -> Result<CountTokensResponse> {
- validate_generate_content_request(&request.generate_content_request)?;
-
- let uri = format!(
- "{api_url}/v1beta/models/{model_id}:countTokens?key={api_key}",
- model_id = &request.generate_content_request.model.model_id,
- );
-
- let request = serde_json::to_string(&request)?;
- let request_builder = HttpRequest::builder()
- .method(Method::POST)
- .uri(&uri)
- .header("Content-Type", "application/json");
- let http_request = request_builder.body(AsyncBody::from(request))?;
-
- let mut response = client.send(http_request).await?;
- let mut text = String::new();
- response.body_mut().read_to_string(&mut text).await?;
- anyhow::ensure!(
- response.status().is_success(),
- "error during countTokens, status code: {:?}, body: {}",
- response.status(),
- text
- );
- Ok(serde_json::from_str::<CountTokensResponse>(&text)?)
-}
-
pub fn validate_generate_content_request(request: &GenerateContentRequest) -> Result<()> {
if request.model.is_empty() {
bail!("Model must be specified");
@@ -123,8 +91,6 @@ pub enum Task {
GenerateContent,
#[serde(rename = "streamGenerateContent")]
StreamGenerateContent,
- #[serde(rename = "countTokens")]
- CountTokens,
#[serde(rename = "embedContent")]
EmbedContent,
#[serde(rename = "batchEmbedContents")]
@@ -382,18 +348,6 @@ pub struct SafetyRating {
pub probability: HarmProbability,
}
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct CountTokensRequest {
- pub generate_content_request: GenerateContentRequest,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct CountTokensResponse {
- pub total_tokens: u64,
-}
-
#[derive(Debug, Serialize, Deserialize)]
pub struct FunctionCall {
pub name: String,
@@ -299,10 +299,6 @@ impl LanguageModel for FakeLanguageModel {
1000000
}
- fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result<u64>> {
- futures::future::ready(Ok(0)).boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -121,12 +121,6 @@ pub trait LanguageModel: Send + Sync {
None
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>>;
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -57,7 +57,6 @@ serde_json.workspace = true
settings.workspace = true
smol.workspace = true
strum.workspace = true
-tiktoken-rs.workspace = true
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
ui.workspace = true
ui_input.workspace = true
@@ -22,10 +22,7 @@ use ui::{ButtonLink, ConfiguredApiCard, List, ListBulletItem, prelude::*};
use ui_input::InputField;
use util::ResultExt;
-pub use anthropic::completion::{
- AnthropicEventMapper, count_anthropic_tokens_with_tiktoken, into_anthropic,
- into_anthropic_count_tokens_request,
-};
+pub use anthropic::completion::{AnthropicEventMapper, into_anthropic};
pub use settings::AnthropicAvailableModel as AvailableModel;
const PROVIDER_ID: LanguageModelProviderId = ANTHROPIC_PROVIDER_ID;
@@ -378,52 +375,6 @@ impl LanguageModel for AnthropicModel {
Some(self.model.max_output_tokens())
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let http_client = self.http_client.clone();
- let model_id = self.model.request_id().to_string();
- let mode = self.model.mode();
-
- let (api_key, api_url) = self.state.read_with(cx, |state, cx| {
- let api_url = AnthropicLanguageModelProvider::api_url(cx);
- (
- state.api_key_state.key(&api_url).map(|k| k.to_string()),
- api_url.to_string(),
- )
- });
-
- let background = cx.background_executor().clone();
- async move {
- // If no API key, fall back to tiktoken estimation
- let Some(api_key) = api_key else {
- return background
- .spawn(async move { count_anthropic_tokens_with_tiktoken(request) })
- .await;
- };
-
- let count_request =
- into_anthropic_count_tokens_request(request.clone(), model_id, mode);
-
- match anthropic::count_tokens(http_client.as_ref(), &api_url, &api_key, count_request)
- .await
- {
- Ok(response) => Ok(response.input_tokens),
- Err(err) => {
- log::error!(
- "Anthropic count_tokens API failed, falling back to tiktoken: {err:?}"
- );
- background
- .spawn(async move { count_anthropic_tokens_with_tiktoken(request) })
- .await
- }
- }
- }
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -706,14 +706,6 @@ impl LanguageModel for BedrockModel {
Some(self.model.max_output_tokens())
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- get_bedrock_tokens(request, cx)
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -1151,68 +1143,6 @@ pub fn into_bedrock(
})
}
-// TODO: just call the ConverseOutput.usage() method:
-// https://docs.rs/aws-sdk-bedrockruntime/latest/aws_sdk_bedrockruntime/operation/converse/struct.ConverseOutput.html#method.output
-pub fn get_bedrock_tokens(
- request: LanguageModelRequest,
- cx: &App,
-) -> BoxFuture<'static, Result<u64>> {
- cx.background_executor()
- .spawn(async move {
- let messages = request.messages;
- let mut tokens_from_images = 0;
- let mut string_messages = Vec::with_capacity(messages.len());
-
- for message in messages {
- use language_model::MessageContent;
-
- let mut string_contents = String::new();
-
- for content in message.content {
- match content {
- MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
- string_contents.push_str(&text);
- }
- MessageContent::RedactedThinking(_) => {}
- MessageContent::Image(image) => {
- tokens_from_images += image.estimate_tokens();
- }
- MessageContent::ToolUse(_tool_use) => {
- // TODO: Estimate token usage from tool uses.
- }
- MessageContent::ToolResult(tool_result) => match tool_result.content {
- LanguageModelToolResultContent::Text(text) => {
- string_contents.push_str(&text);
- }
- LanguageModelToolResultContent::Image(image) => {
- tokens_from_images += image.estimate_tokens();
- }
- },
- }
- }
-
- if !string_contents.is_empty() {
- string_messages.push(tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(string_contents),
- name: None,
- function_call: None,
- });
- }
- }
-
- // Tiktoken doesn't yet support these models, so we manually use the
- // same tokenizer as GPT-4.
- tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages)
- .map(|tokens| (tokens + tokens_from_images) as u64)
- })
- .boxed()
-}
-
pub fn map_to_language_model_completion_events(
events: Pin<Box<dyn Send + Stream<Item = Result<BedrockStreamingResponse, anyhow::Error>>>>,
) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
@@ -203,25 +203,6 @@ impl LanguageModelProvider for CopilotChatLanguageModelProvider {
}
}
-fn collect_tiktoken_messages(
- request: LanguageModelRequest,
-) -> Vec<tiktoken_rs::ChatCompletionRequestMessage> {
- request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>()
-}
-
pub struct CopilotChatLanguageModel {
model: CopilotChatModel,
request_limiter: RateLimiter,
@@ -318,27 +299,6 @@ impl LanguageModel for CopilotChatLanguageModel {
self.model.max_token_count()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let model = self.model.clone();
- cx.background_spawn(async move {
- let messages = collect_tiktoken_messages(request);
- // Copilot uses OpenAI tiktoken tokenizer for all it's model irrespective of the underlying provider(vendor).
- let tokenizer_model = match model.tokenizer() {
- Some("o200k_base") => "gpt-4o",
- Some("cl100k_base") => "gpt-4",
- _ => "gpt-4o",
- };
-
- tiktoken_rs::num_tokens_from_messages(tokenizer_model, &messages)
- .map(|tokens| tokens as u64)
- })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -293,32 +293,6 @@ impl LanguageModel for DeepSeekLanguageModel {
self.model.max_output_tokens()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- cx.background_spawn(async move {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- tiktoken_rs::num_tokens_from_messages("gpt-4", &messages).map(|tokens| tokens as u64)
- })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -2,7 +2,7 @@ use anyhow::{Context as _, Result};
use collections::BTreeMap;
use credentials_provider::CredentialsProvider;
use futures::{FutureExt, StreamExt, future::BoxFuture};
-pub use google_ai::completion::{GoogleEventMapper, count_google_tokens, into_google};
+pub use google_ai::completion::{GoogleEventMapper, into_google};
use google_ai::{GenerateContentResponse, GoogleModelMode};
use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, Window};
use http_client::HttpClient;
@@ -327,38 +327,6 @@ impl LanguageModel for GoogleLanguageModel {
self.model.max_output_tokens()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let model_id = self.model.request_id().to_string();
- let request = into_google(request, model_id, self.model.mode());
- let http_client = self.http_client.clone();
- let api_url = GoogleLanguageModelProvider::api_url(cx);
- let api_key = self.state.read(cx).api_key_state.key(&api_url);
-
- async move {
- let Some(api_key) = api_key else {
- return Err(LanguageModelCompletionError::NoApiKey {
- provider: PROVIDER_NAME,
- }
- .into());
- };
- let response = google_ai::count_tokens(
- http_client.as_ref(),
- &api_url,
- &api_key,
- google_ai::CountTokensRequest {
- generate_content_request: request,
- },
- )
- .await?;
- Ok(response.total_tokens)
- }
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -505,22 +505,6 @@ impl LanguageModel for LmStudioLanguageModel {
self.model.max_token_count()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- _cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- // Endpoint for this is coming soon. In the meantime, hacky estimation
- let token_count = request
- .messages
- .iter()
- .map(|msg| msg.string_contents().split_whitespace().count())
- .sum::<usize>();
-
- let estimated_tokens = (token_count as f64 * 0.75) as u64;
- async move { Ok(estimated_tokens) }.boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -327,32 +327,6 @@ impl LanguageModel for MistralLanguageModel {
self.model.max_output_tokens()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- cx.background_spawn(async move {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- tiktoken_rs::num_tokens_from_messages("gpt-4", &messages).map(|tokens| tokens as u64)
- })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -493,23 +493,6 @@ impl LanguageModel for OllamaLanguageModel {
self.model.max_token_count()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- _cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- // There is no endpoint for this _yet_ in Ollama
- // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
- let token_count = request
- .messages
- .iter()
- .map(|msg| msg.string_contents().chars().count())
- .sum::<usize>()
- / 4;
-
- async move { Ok(token_count as u64) }.boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -25,8 +25,7 @@ use ui_input::InputField;
use util::ResultExt;
pub use open_ai::completion::{
- OpenAiEventMapper, OpenAiResponseEventMapper, collect_tiktoken_messages, count_open_ai_tokens,
- into_open_ai, into_open_ai_response,
+ OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
};
const PROVIDER_ID: LanguageModelProviderId = OPEN_AI_PROVIDER_ID;
@@ -369,16 +368,6 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.max_output_tokens()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let model = self.model.clone();
- cx.background_spawn(async move { count_open_ai_tokens(request, model) })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -360,27 +360,6 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
self.model.max_output_tokens
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let max_token_count = self.max_token_count();
- cx.background_spawn(async move {
- let messages = super::open_ai::collect_tiktoken_messages(request);
- let model = if max_token_count >= 100_000 {
- // If the max tokens is 100k or more, it is likely the o200k_base tokenizer from gpt4o
- "gpt-4o"
- } else {
- // Otherwise fallback to gpt-4, since only cl100k_base and o200k_base are
- // supported with this tiktoken method
- "gpt-4"
- };
- tiktoken_rs::num_tokens_from_messages(model, &messages).map(|tokens| tokens as u64)
- })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -372,14 +372,6 @@ impl LanguageModel for OpenRouterLanguageModel {
self.model.supports_images.unwrap_or(false)
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- count_open_router_tokens(request, self.model.clone(), cx)
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -741,32 +733,6 @@ struct RawToolCall {
thought_signature: Option<String>,
}
-pub fn count_open_router_tokens(
- request: LanguageModelRequest,
- _model: open_router::Model,
- cx: &App,
-) -> BoxFuture<'static, Result<u64>> {
- cx.background_spawn(async move {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- tiktoken_rs::num_tokens_from_messages("gpt-4o", &messages).map(|tokens| tokens as u64)
- })
- .boxed()
-}
-
struct ConfigurationView {
api_key_editor: Entity<InputField>,
state: Entity<State>,
@@ -8,7 +8,7 @@ use language_model::{
ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError,
LanguageModelCompletionEvent, LanguageModelId, LanguageModelName, LanguageModelProvider,
LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
- LanguageModelRequest, LanguageModelToolChoice, RateLimiter, Role, env_var,
+ LanguageModelRequest, LanguageModelToolChoice, RateLimiter, env_var,
};
use opencode::{ApiProtocol, OPENCODE_API_URL};
pub use settings::OpenCodeAvailableModel as AvailableModel;
@@ -426,32 +426,6 @@ impl LanguageModel for OpenCodeLanguageModel {
self.model.max_output_tokens()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- cx.background_spawn(async move {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- tiktoken_rs::num_tokens_from_messages("gpt-4o", &messages).map(|tokens| tokens as u64)
- })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -8,7 +8,7 @@ use language_model::{
ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError,
LanguageModelCompletionEvent, LanguageModelId, LanguageModelName, LanguageModelProvider,
LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
- LanguageModelRequest, LanguageModelToolChoice, RateLimiter, Role, env_var,
+ LanguageModelRequest, LanguageModelToolChoice, RateLimiter, env_var,
};
use open_ai::ResponseStreamEvent;
pub use settings::VercelAvailableModel as AvailableModel;
@@ -18,7 +18,7 @@ use strum::IntoEnumIterator;
use ui::{ButtonLink, ConfiguredApiCard, List, ListBulletItem, prelude::*};
use ui_input::InputField;
use util::ResultExt;
-use vercel::{Model, VERCEL_API_URL};
+use vercel::VERCEL_API_URL;
const PROVIDER_ID: LanguageModelProviderId = LanguageModelProviderId::new("vercel");
const PROVIDER_NAME: LanguageModelProviderName = LanguageModelProviderName::new("Vercel");
@@ -295,14 +295,6 @@ impl LanguageModel for VercelLanguageModel {
self.model.max_output_tokens()
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- count_vercel_tokens(request, self.model.clone(), cx)
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -335,51 +327,6 @@ impl LanguageModel for VercelLanguageModel {
}
}
-pub fn count_vercel_tokens(
- request: LanguageModelRequest,
- model: Model,
- cx: &App,
-) -> BoxFuture<'static, Result<u64>> {
- cx.background_spawn(async move {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- match model {
- Model::Custom { max_tokens, .. } => {
- let model = if max_tokens >= 100_000 {
- // If the max tokens is 100k or more, it is likely the o200k_base tokenizer from gpt4o
- "gpt-4o"
- } else {
- // Otherwise fallback to gpt-4, since only cl100k_base and o200k_base are
- // supported with this tiktoken method
- "gpt-4"
- };
- tiktoken_rs::num_tokens_from_messages(model, &messages)
- }
- // Map Vercel models to appropriate OpenAI models for token counting
- // since Vercel uses OpenAI-compatible API
- Model::VZeroOnePointFiveMedium => {
- // Vercel v0 is similar to GPT-4o, so use gpt-4o for token counting
- tiktoken_rs::num_tokens_from_messages("gpt-4o", &messages)
- }
- }
- .map(|tokens| tokens as u64)
- })
- .boxed()
-}
-
struct ConfigurationView {
api_key_editor: Entity<InputField>,
state: Entity<State>,
@@ -422,24 +422,6 @@ impl LanguageModel for VercelAiGatewayLanguageModel {
self.model.max_output_tokens
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let max_token_count = self.max_token_count();
- cx.background_spawn(async move {
- let messages = crate::provider::open_ai::collect_tiktoken_messages(request);
- let model = if max_token_count >= 100_000 {
- "gpt-4o"
- } else {
- "gpt-4"
- };
- tiktoken_rs::num_tokens_from_messages(model, &messages).map(|tokens| tokens as u64)
- })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -20,7 +20,6 @@ use ui::{ButtonLink, ConfiguredApiCard, List, ListBulletItem, prelude::*};
use ui_input::InputField;
use util::ResultExt;
use x_ai::XAI_API_URL;
-pub use x_ai::completion::count_xai_tokens;
const PROVIDER_ID: LanguageModelProviderId = LanguageModelProviderId::new("x_ai");
const PROVIDER_NAME: LanguageModelProviderName = LanguageModelProviderName::new("xAI");
@@ -316,16 +315,6 @@ impl LanguageModel for XAiLanguageModel {
true
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- let model = self.model.clone();
- cx.background_spawn(async move { count_xai_tokens(request, model) })
- .boxed()
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -27,7 +27,6 @@ serde.workspace = true
serde_json.workspace = true
smol.workspace = true
thiserror.workspace = true
-x_ai = { workspace = true, features = ["schemars"] }
[dev-dependencies]
language_model = { workspace = true, features = ["test-support"] }
@@ -3,9 +3,8 @@ use anyhow::{Context as _, Result, anyhow};
use cloud_llm_client::{
CLIENT_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, CLIENT_SUPPORTS_STATUS_STREAM_ENDED_HEADER_NAME,
CLIENT_SUPPORTS_X_AI_HEADER_NAME, CompletionBody, CompletionEvent, CompletionRequestStatus,
- CountTokensBody, CountTokensResponse, EXPIRED_LLM_TOKEN_HEADER_NAME, ListModelsResponse,
- OUTDATED_LLM_TOKEN_HEADER_NAME, SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME,
- ZED_VERSION_HEADER_NAME,
+ EXPIRED_LLM_TOKEN_HEADER_NAME, ListModelsResponse, OUTDATED_LLM_TOKEN_HEADER_NAME,
+ SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, ZED_VERSION_HEADER_NAME,
};
use futures::{
AsyncBufReadExt, FutureExt, Stream, StreamExt,
@@ -13,7 +12,7 @@ use futures::{
stream::{self, BoxStream},
};
use google_ai::GoogleModelMode;
-use gpui::{App, AppContext, AsyncApp, Context, Task};
+use gpui::{AppContext, AsyncApp, Context, Task};
use http_client::http::{HeaderMap, HeaderValue};
use http_client::{
AsyncBody, HttpClient, HttpClientWithUrl, HttpRequestExt, Method, Response, StatusCode,
@@ -40,15 +39,11 @@ use std::task::Poll;
use std::time::Duration;
use thiserror::Error;
-use anthropic::completion::{
- AnthropicEventMapper, count_anthropic_tokens_with_tiktoken, into_anthropic,
-};
+use anthropic::completion::{AnthropicEventMapper, into_anthropic};
use google_ai::completion::{GoogleEventMapper, into_google};
use open_ai::completion::{
- OpenAiEventMapper, OpenAiResponseEventMapper, count_open_ai_tokens, into_open_ai,
- into_open_ai_response,
+ OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
};
-use x_ai::completion::count_xai_tokens;
const PROVIDER_ID: LanguageModelProviderId = ZED_CLOUD_PROVIDER_ID;
const PROVIDER_NAME: LanguageModelProviderName = ZED_CLOUD_PROVIDER_NAME;
@@ -374,85 +369,6 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
}
}
- fn count_tokens(
- &self,
- request: LanguageModelRequest,
- cx: &App,
- ) -> BoxFuture<'static, Result<u64>> {
- match self.model.provider {
- cloud_llm_client::LanguageModelProvider::Anthropic => cx
- .background_spawn(async move { count_anthropic_tokens_with_tiktoken(request) })
- .boxed(),
- cloud_llm_client::LanguageModelProvider::OpenAi => {
- let model = match open_ai::Model::from_id(&self.model.id.0) {
- Ok(model) => model,
- Err(err) => return async move { Err(anyhow!(err)) }.boxed(),
- };
- cx.background_spawn(async move { count_open_ai_tokens(request, model) })
- .boxed()
- }
- cloud_llm_client::LanguageModelProvider::XAi => {
- let model = match x_ai::Model::from_id(&self.model.id.0) {
- Ok(model) => model,
- Err(err) => return async move { Err(anyhow!(err)) }.boxed(),
- };
- cx.background_spawn(async move { count_xai_tokens(request, model) })
- .boxed()
- }
- cloud_llm_client::LanguageModelProvider::Google => {
- let http_client = self.http_client.clone();
- let token_provider = self.token_provider.clone();
- let model_id = self.model.id.to_string();
- let generate_content_request =
- into_google(request, model_id.clone(), GoogleModelMode::Default);
- let auth_context = token_provider.auth_context(cx);
- async move {
- let token = token_provider.acquire_token(auth_context).await?;
-
- let request_body = CountTokensBody {
- provider: cloud_llm_client::LanguageModelProvider::Google,
- model: model_id,
- provider_request: serde_json::to_value(&google_ai::CountTokensRequest {
- generate_content_request,
- })?,
- };
- let request = http_client::Request::builder()
- .method(Method::POST)
- .uri(
- http_client
- .build_zed_llm_url("/count_tokens", &[])?
- .as_ref(),
- )
- .header("Content-Type", "application/json")
- .header("Authorization", format!("Bearer {token}"))
- .body(serde_json::to_string(&request_body)?.into())?;
- let mut response = http_client.send(request).await?;
- let status = response.status();
- let headers = response.headers().clone();
- let mut response_body = String::new();
- response
- .body_mut()
- .read_to_string(&mut response_body)
- .await?;
-
- if status.is_success() {
- let response_body: CountTokensResponse =
- serde_json::from_str(&response_body)?;
-
- Ok(response_body.tokens as u64)
- } else {
- Err(anyhow!(ApiError {
- status,
- body: response_body,
- headers
- }))
- }
- }
- .boxed()
- }
- }
- }
-
fn stream_completion(
&self,
request: LanguageModelRequest,
@@ -28,7 +28,6 @@ serde.workspace = true
serde_json.workspace = true
strum.workspace = true
thiserror.workspace = true
-tiktoken-rs.workspace = true
[dev-dependencies]
pretty_assertions.workspace = true
@@ -18,7 +18,7 @@ use crate::responses::{
StreamEvent as ResponsesStreamEvent,
};
use crate::{
- FunctionContent, FunctionDefinition, ImageUrl, MessagePart, Model, ReasoningEffort,
+ FunctionContent, FunctionDefinition, ImageUrl, MessagePart, ReasoningEffort,
ResponseStreamEvent, ToolCall, ToolCallContent,
};
@@ -818,68 +818,6 @@ fn token_usage_from_response_usage(usage: &ResponsesUsage) -> TokenUsage {
}
}
-pub fn collect_tiktoken_messages(
- request: LanguageModelRequest,
-) -> Vec<tiktoken_rs::ChatCompletionRequestMessage> {
- request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>()
-}
-
-/// Count tokens for an OpenAI model. This is synchronous; callers should spawn
-/// it on a background thread if needed.
-pub fn count_open_ai_tokens(request: LanguageModelRequest, model: Model) -> Result<u64> {
- let messages = collect_tiktoken_messages(request);
- match model {
- Model::Custom { max_tokens, .. } => {
- let model = if max_tokens >= 100_000 {
- // If the max tokens is 100k or more, it likely uses the o200k_base tokenizer
- "gpt-4o"
- } else {
- // Otherwise fallback to gpt-4, since only cl100k_base and o200k_base are
- // supported with this tiktoken method
- "gpt-4"
- };
- tiktoken_rs::num_tokens_from_messages(model, &messages)
- }
- // Currently supported by tiktoken_rs
- // Sometimes tiktoken-rs is behind on model support. If that is the case, make a new branch
- // arm with an override. We enumerate all supported models here so that we can check if new
- // models are supported yet or not.
- Model::ThreePointFiveTurbo
- | Model::Four
- | Model::FourTurbo
- | Model::FourOmniMini
- | Model::FourPointOneNano
- | Model::O1
- | Model::O3
- | Model::O3Mini
- | Model::Five
- | Model::FiveCodex
- | Model::FiveMini
- | Model::FiveNano => tiktoken_rs::num_tokens_from_messages(model.id(), &messages),
- // GPT-5.1, 5.2, 5.2-codex, 5.3-codex, 5.4, and 5.4-pro don't have dedicated tiktoken support; use gpt-5 tokenizer
- Model::FivePointOne
- | Model::FivePointTwo
- | Model::FivePointTwoCodex
- | Model::FivePointThreeCodex
- | Model::FivePointFour
- | Model::FivePointFourPro => tiktoken_rs::num_tokens_from_messages("gpt-5", &messages),
- }
- .map(|tokens| tokens as u64)
-}
-
#[cfg(test)]
mod tests {
use crate::responses::{
@@ -929,34 +867,6 @@ mod tests {
})
}
- #[test]
- fn tiktoken_rs_support() {
- let request = LanguageModelRequest {
- thread_id: None,
- prompt_id: None,
- intent: None,
- messages: vec![LanguageModelRequestMessage {
- role: Role::User,
- content: vec![MessageContent::Text("message".into())],
- cache: false,
- reasoning_details: None,
- }],
- tools: vec![],
- tool_choice: None,
- stop: vec![],
- temperature: None,
- thinking_allowed: true,
- thinking_effort: None,
- speed: None,
- };
-
- // Validate that all models are supported by tiktoken-rs
- for model in <Model as strum::IntoEnumIterator>::iter() {
- let count = count_open_ai_tokens(request.clone(), model).unwrap();
- assert!(count > 0);
- }
- }
-
#[test]
fn responses_stream_maps_text_and_usage() {
let events = vec![
@@ -8,9 +8,7 @@ use gpui::{
WindowOptions, actions, point, size, transparent_black,
};
use language::{Buffer, LanguageRegistry, language_settings::SoftWrap};
-use language_model::{
- ConfiguredModel, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage, Role,
-};
+use language_model::{ConfiguredModel, LanguageModelRegistry};
use picker::{Picker, PickerDelegate};
use platform_title_bar::PlatformTitleBar;
use release_channel::ReleaseChannel;
@@ -165,8 +163,6 @@ pub struct RulesLibrary {
struct RuleEditor {
title_editor: Entity<Editor>,
body_editor: Entity<Editor>,
- token_count: Option<u64>,
- pending_token_count: Task<Option<()>>,
next_title_and_body_to_save: Option<(String, Rope)>,
pending_save: Option<Task<Option<()>>>,
_subscriptions: Vec<Subscription>,
@@ -785,13 +781,10 @@ impl RulesLibrary {
body_editor,
next_title_and_body_to_save: None,
pending_save: None,
- token_count: None,
- pending_token_count: Task::ready(None),
_subscriptions,
},
);
this.set_active_rule(Some(prompt_id), window, cx);
- this.count_tokens(prompt_id, window, cx);
}
Err(error) => {
// TODO: we should show the error in the UI.
@@ -1019,7 +1012,6 @@ impl RulesLibrary {
match event {
EditorEvent::BufferEdited => {
self.save_rule(prompt_id, window, cx);
- self.count_tokens(prompt_id, window, cx);
}
EditorEvent::Blurred => {
title_editor.update(cx, |title_editor, cx| {
@@ -1049,7 +1041,6 @@ impl RulesLibrary {
match event {
EditorEvent::BufferEdited => {
self.save_rule(prompt_id, window, cx);
- self.count_tokens(prompt_id, window, cx);
}
EditorEvent::Blurred => {
body_editor.update(cx, |body_editor, cx| {
@@ -1068,59 +1059,6 @@ impl RulesLibrary {
}
}
- fn count_tokens(&mut self, prompt_id: PromptId, window: &mut Window, cx: &mut Context<Self>) {
- let Some(ConfiguredModel { model, .. }) =
- LanguageModelRegistry::read_global(cx).default_model()
- else {
- return;
- };
- if let Some(rule) = self.rule_editors.get_mut(&prompt_id) {
- let editor = &rule.body_editor.read(cx);
- let buffer = &editor.buffer().read(cx).as_singleton().unwrap().read(cx);
- let body = buffer.as_rope().clone();
- rule.pending_token_count = cx.spawn_in(window, async move |this, cx| {
- async move {
- const DEBOUNCE_TIMEOUT: Duration = Duration::from_secs(1);
-
- cx.background_executor().timer(DEBOUNCE_TIMEOUT).await;
- let token_count = cx
- .update(|_, cx| {
- model.count_tokens(
- LanguageModelRequest {
- thread_id: None,
- prompt_id: None,
- intent: None,
- messages: vec![LanguageModelRequestMessage {
- role: Role::System,
- content: vec![body.to_string().into()],
- cache: false,
- reasoning_details: None,
- }],
- tools: Vec::new(),
- tool_choice: None,
- stop: Vec::new(),
- temperature: None,
- thinking_allowed: true,
- thinking_effort: None,
- speed: None,
- },
- cx,
- )
- })?
- .await?;
-
- this.update(cx, |this, cx| {
- let rule_editor = this.rule_editors.get_mut(&prompt_id).unwrap();
- rule_editor.token_count = Some(token_count);
- cx.notify();
- })
- }
- .log_err()
- .await
- });
- }
- }
-
fn render_rule_list(&mut self, cx: &mut Context<Self>) -> impl IntoElement {
v_flex()
.id("rule-list")
@@ -1293,8 +1231,6 @@ impl RulesLibrary {
let rule_metadata = self.store.read(cx).metadata(prompt_id)?;
let rule_editor = &self.rule_editors[&prompt_id];
let focus_handle = rule_editor.body_editor.focus_handle(cx);
- let registry = LanguageModelRegistry::read_global(cx);
- let model = registry.default_model().map(|default| default.model);
let built_in = prompt_id.is_built_in();
Some(
@@ -1318,52 +1254,17 @@ impl RulesLibrary {
built_in,
cx,
))
- .child(
- h_flex()
- .h_full()
- .flex_shrink_0()
- .children(rule_editor.token_count.map(|token_count| {
- let token_count: SharedString =
- token_count.to_string().into();
- let label_token_count: SharedString =
- token_count.to_string().into();
-
- div()
- .id("token_count")
- .mr_1()
- .flex_shrink_0()
- .tooltip(move |_window, cx| {
- Tooltip::with_meta(
- "Token Estimation",
- None,
- format!(
- "Model: {}",
- model
- .as_ref()
- .map(|model| model.name().0)
- .unwrap_or_default()
- ),
- cx,
- )
- })
- .child(
- Label::new(format!(
- "{} tokens",
- label_token_count
- ))
- .color(Color::Muted),
- )
- }))
- .map(|this| {
- if built_in {
- this.child(self.render_built_in_rule_controls())
- } else {
- this.child(self.render_regular_rule_controls(
- rule_metadata.default,
- ))
- }
- }),
- ),
+ .child(h_flex().h_full().flex_shrink_0().map(|this| {
+ if built_in {
+ this.child(self.render_built_in_rule_controls())
+ } else {
+ this.child(
+ self.render_regular_rule_controls(
+ rule_metadata.default,
+ ),
+ )
+ }
+ })),
)
.child(
div()
@@ -17,8 +17,6 @@ schemars = ["dep:schemars"]
[dependencies]
anyhow.workspace = true
-language_model_core.workspace = true
schemars = { workspace = true, optional = true }
serde.workspace = true
strum.workspace = true
-tiktoken-rs.workspace = true
@@ -1,30 +0,0 @@
-use anyhow::Result;
-use language_model_core::{LanguageModelRequest, Role};
-
-use crate::Model;
-
-/// Count tokens for an xAI model using tiktoken. This is synchronous;
-/// callers should spawn it on a background thread if needed.
-pub fn count_xai_tokens(request: LanguageModelRequest, model: Model) -> Result<u64> {
- let messages = request
- .messages
- .into_iter()
- .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
- role: match message.role {
- Role::User => "user".into(),
- Role::Assistant => "assistant".into(),
- Role::System => "system".into(),
- },
- content: Some(message.string_contents()),
- name: None,
- function_call: None,
- })
- .collect::<Vec<_>>();
-
- let model_name = if model.max_token_count() >= 100_000 {
- "gpt-4o"
- } else {
- "gpt-4"
- };
- tiktoken_rs::num_tokens_from_messages(model_name, &messages).map(|tokens| tokens as u64)
-}
@@ -1,5 +1,3 @@
-pub mod completion;
-
use anyhow::Result;
use serde::{Deserialize, Serialize};
use strum::EnumIter;