ep: Compress EP requests (#48793)

Oleksiy Syvokon created

Benchmark results on 3,000 captured requests:

| Scenario          | Mean   | Savings | p95    | Savings |
|:------------------|-------:|--------:|-------:|--------:|
| Uncompressed      | 12,275 |       — | 35,896 |       — |
| zstd (no dict)    |  2,784 |  77.3%  |  7,193 |  80.0%  |
| zstd + global dict|  2,544 |  79.3%  |  6,862 |  80.9%  |
| brotli (no dict)  |  2,650 |  78.4%  |  6,857 |  80.9%  |

A trained zstd dictionary adds only 2% over plain zstd, which is not
worth added complexity.

Brotli compresses comparably but is 4 times slower.

Release Notes:

- N/A

Change summary

Cargo.lock                                          | 1 +
crates/edit_prediction/Cargo.toml                   | 1 +
crates/edit_prediction/src/edit_prediction.rs       | 6 +++++-
crates/edit_prediction/src/edit_prediction_tests.rs | 3 ++-
4 files changed, 9 insertions(+), 2 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -5353,6 +5353,7 @@ dependencies = [
  "zed_actions",
  "zeta_prompt",
  "zlog",
+ "zstd",
 ]
 
 [[package]]

crates/edit_prediction/Cargo.toml 🔗

@@ -65,6 +65,7 @@ workspace.workspace = true
 worktree.workspace = true
 zed_actions.workspace = true
 zeta_prompt.workspace = true
+zstd.workspace = true
 
 [dev-dependencies]
 clock = { workspace = true, features = ["test-support"] }

crates/edit_prediction/src/edit_prediction.rs 🔗

@@ -1944,11 +1944,15 @@ impl EditPredictionStore {
 
         let request = PredictEditsV3Request { input, trigger };
 
+        let json_bytes = serde_json::to_vec(&request)?;
+        let compressed = zstd::encode_all(&json_bytes[..], 3)?;
+
         Self::send_api_request(
             |builder| {
                 let req = builder
                     .uri(url.as_ref())
-                    .body(serde_json::to_string(&request)?.into());
+                    .header("Content-Encoding", "zstd")
+                    .body(compressed.clone().into());
                 Ok(req?)
             },
             client,

crates/edit_prediction/src/edit_prediction_tests.rs 🔗

@@ -1380,7 +1380,8 @@ fn init_test_with_fake_client(
                         "/predict_edits/v3" => {
                             let mut buf = Vec::new();
                             body.read_to_end(&mut buf).await.ok();
-                            let req = serde_json::from_slice(&buf).unwrap();
+                            let decompressed = zstd::decode_all(&buf[..]).unwrap();
+                            let req = serde_json::from_slice(&decompressed).unwrap();
 
                             let (res_tx, res_rx) = oneshot::channel();
                             predict_req_tx.unbounded_send((req, res_tx)).unwrap();