Send up Zed version with edit prediction and completion requests (#30136)

Marshall Bowers created

This PR makes it so we send up an `x-zed-version` header with the
client's version when making a request to llm.zed.dev for edit
predictions and completions.

Release Notes:

- N/A

Change summary

Cargo.lock                                   |  5 +++--
Cargo.toml                                   |  2 +-
crates/agent/src/thread.rs                   |  4 ++--
crates/language_models/Cargo.toml            |  1 +
crates/language_models/src/provider/cloud.rs | 20 +++++++++++++++++---
crates/zeta/src/zeta.rs                      |  3 ++-
6 files changed, 26 insertions(+), 9 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -7904,6 +7904,7 @@ dependencies = [
  "partial-json-fixer",
  "project",
  "proto",
+ "release_channel",
  "schemars",
  "serde",
  "serde_json",
@@ -18883,9 +18884,9 @@ dependencies = [
 
 [[package]]
 name = "zed_llm_client"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fe0d60001c02d0d21a4114a13bee3a905fbb9e146ada80a90435c05fda18852"
+checksum = "a23b2fd00776b0c55072f389654910ceb501eb0083d7f78905ab0e5cc86949ec"
 dependencies = [
  "anyhow",
  "serde",

Cargo.toml 🔗

@@ -610,7 +610,7 @@ wasmtime-wasi = "29"
 which = "6.0.0"
 wit-component = "0.221"
 workspace-hack = "0.1.0"
-zed_llm_client = "0.7.5"
+zed_llm_client = "0.8.0"
 zstd = "0.11"
 
 [workspace.dependencies.async-stripe]

crates/agent/src/thread.rs 🔗

@@ -1546,9 +1546,9 @@ impl Thread {
                                             completion.queue_state =  QueueState::Started;
                                         }
                                         CompletionRequestStatus::Failed {
-                                            code, message
+                                            code, message, request_id
                                         } => {
-                                            return Err(anyhow!("completion request failed. code: {code}, message: {message}"));
+                                            return Err(anyhow!("completion request failed. request_id: {request_id}, code: {code}, message: {message}"));
                                         }
                                         CompletionRequestStatus::UsageUpdated {
                                             amount, limit

crates/language_models/Cargo.toml 🔗

@@ -41,6 +41,7 @@ open_ai = { workspace = true, features = ["schemars"] }
 partial-json-fixer.workspace = true
 project.workspace = true
 proto.workspace = true
+release_channel.workspace = true
 schemars.workspace = true
 serde.workspace = true
 serde_json.workspace = true

crates/language_models/src/provider/cloud.rs 🔗

@@ -6,7 +6,9 @@ use feature_flags::{FeatureFlagAppExt, LlmClosedBetaFeatureFlag, ZedProFeatureFl
 use futures::{
     AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
 };
-use gpui::{AnyElement, AnyView, App, AsyncApp, Context, Entity, Subscription, Task};
+use gpui::{
+    AnyElement, AnyView, App, AsyncApp, Context, Entity, SemanticVersion, Subscription, Task,
+};
 use http_client::{AsyncBody, HttpClient, Method, Response, StatusCode};
 use language_model::{
     AuthenticateError, CloudModel, LanguageModel, LanguageModelCacheConfiguration,
@@ -20,6 +22,7 @@ use language_model::{
     MaxMonthlySpendReachedError, PaymentRequiredError, RefreshLlmTokenListener,
 };
 use proto::Plan;
+use release_channel::AppVersion;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize, de::DeserializeOwned};
 use settings::{Settings, SettingsStore};
@@ -39,7 +42,7 @@ use zed_llm_client::{
     CompletionRequestStatus, CountTokensBody, CountTokensResponse, EXPIRED_LLM_TOKEN_HEADER_NAME,
     MAX_LLM_MONTHLY_SPEND_REACHED_HEADER_NAME, MODEL_REQUESTS_RESOURCE_HEADER_VALUE,
     SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME,
-    TOOL_USE_LIMIT_REACHED_HEADER_NAME,
+    TOOL_USE_LIMIT_REACHED_HEADER_NAME, ZED_VERSION_HEADER_NAME,
 };
 
 use crate::AllLanguageModelSettings;
@@ -526,6 +529,7 @@ impl CloudLanguageModel {
     async fn perform_llm_completion(
         client: Arc<Client>,
         llm_api_token: LlmApiToken,
+        app_version: Option<SemanticVersion>,
         body: CompletionBody,
     ) -> Result<PerformLlmCompletionResponse> {
         let http_client = &client.http_client();
@@ -542,6 +546,12 @@ impl CloudLanguageModel {
             } else {
                 request_builder.uri(http_client.build_zed_llm_url("/completions", &[])?.as_ref())
             };
+            let request_builder = if let Some(app_version) = app_version {
+                request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string())
+            } else {
+                request_builder
+            };
+
             let request = request_builder
                 .header("Content-Type", "application/json")
                 .header("Authorization", format!("Bearer {token}"))
@@ -774,7 +784,7 @@ impl LanguageModel for CloudLanguageModel {
     fn stream_completion(
         &self,
         request: LanguageModelRequest,
-        _cx: &AsyncApp,
+        cx: &AsyncApp,
     ) -> BoxFuture<
         'static,
         Result<
@@ -784,6 +794,7 @@ impl LanguageModel for CloudLanguageModel {
         let thread_id = request.thread_id.clone();
         let prompt_id = request.prompt_id.clone();
         let mode = request.mode;
+        let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
         match &self.model {
             CloudModel::Anthropic(model) => {
                 let request = into_anthropic(
@@ -804,6 +815,7 @@ impl LanguageModel for CloudLanguageModel {
                     } = Self::perform_llm_completion(
                         client.clone(),
                         llm_api_token,
+                        app_version,
                         CompletionBody {
                             thread_id,
                             prompt_id,
@@ -855,6 +867,7 @@ impl LanguageModel for CloudLanguageModel {
                     } = Self::perform_llm_completion(
                         client.clone(),
                         llm_api_token,
+                        app_version,
                         CompletionBody {
                             thread_id,
                             prompt_id,
@@ -891,6 +904,7 @@ impl LanguageModel for CloudLanguageModel {
                     } = Self::perform_llm_completion(
                         client.clone(),
                         llm_api_token,
+                        app_version,
                         CompletionBody {
                             thread_id,
                             prompt_id,

crates/zeta/src/zeta.rs 🔗

@@ -55,7 +55,7 @@ use workspace::notifications::{ErrorMessagePrompt, NotificationId};
 use worktree::Worktree;
 use zed_llm_client::{
     EXPIRED_LLM_TOKEN_HEADER_NAME, MINIMUM_REQUIRED_VERSION_HEADER_NAME, PredictEditsBody,
-    PredictEditsResponse,
+    PredictEditsResponse, ZED_VERSION_HEADER_NAME,
 };
 
 const CURSOR_MARKER: &'static str = "<|user_cursor_is_here|>";
@@ -754,6 +754,7 @@ and then another
                 let request = request_builder
                     .header("Content-Type", "application/json")
                     .header("Authorization", format!("Bearer {}", token))
+                    .header(ZED_VERSION_HEADER_NAME, app_version.to_string())
                     .body(serde_json::to_string(&body)?.into())?;
 
                 let mut response = http_client.send(request).await?;