Always double reconnection delay and add jitter (#35337)

Antonio Scandurra and Marshall Bowers created

Previously, we would pick an exponent between 0.5 and 2.5, which would
cause a lot of clients to try reconnecting in rapid succession,
overwhelming the server as a result.

This pull request always doubles the previous delay and introduces a
jitter that can, at most, double it.

As part of this, we're also increasing the maximum reconnection delay
from 10s to 30s: this gives us more space to spread out the reconnection
requests.

Release Notes:

- N/A

---------

Co-authored-by: Marshall Bowers <git@maxdeviant.com>

Change summary

crates/client/src/client.rs                  | 13 ++++++-------
crates/collab/src/tests/integration_tests.rs |  4 ++--
2 files changed, 8 insertions(+), 9 deletions(-)

Detailed changes

crates/client/src/client.rs 🔗

@@ -31,7 +31,6 @@ use rpc::proto::{AnyTypedEnvelope, EnvelopedMessage, PeerId, RequestMessage};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::{Settings, SettingsSources};
-use std::pin::Pin;
 use std::{
     any::TypeId,
     convert::TryFrom,
@@ -45,6 +44,7 @@ use std::{
     },
     time::{Duration, Instant},
 };
+use std::{cmp, pin::Pin};
 use telemetry::Telemetry;
 use thiserror::Error;
 use tokio::net::TcpStream;
@@ -78,7 +78,7 @@ pub static ZED_ALWAYS_ACTIVE: LazyLock<bool> =
     LazyLock::new(|| std::env::var("ZED_ALWAYS_ACTIVE").map_or(false, |e| !e.is_empty()));
 
 pub const INITIAL_RECONNECTION_DELAY: Duration = Duration::from_millis(500);
-pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(10);
+pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(30);
 pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(20);
 
 actions!(
@@ -727,11 +727,10 @@ impl Client {
                                 },
                                 &cx,
                             );
-                            cx.background_executor().timer(delay).await;
-                            delay = delay
-                                .mul_f32(rng.gen_range(0.5..=2.5))
-                                .max(INITIAL_RECONNECTION_DELAY)
-                                .min(MAX_RECONNECTION_DELAY);
+                            let jitter =
+                                Duration::from_millis(rng.gen_range(0..delay.as_millis() as u64));
+                            cx.background_executor().timer(delay + jitter).await;
+                            delay = cmp::min(delay * 2, MAX_RECONNECTION_DELAY);
                         } else {
                             break;
                         }

crates/collab/src/tests/integration_tests.rs 🔗

@@ -842,7 +842,7 @@ async fn test_client_disconnecting_from_room(
 
     // Allow user A to reconnect to the server.
     server.allow_connections();
-    executor.advance_clock(RECEIVE_TIMEOUT);
+    executor.advance_clock(RECONNECT_TIMEOUT);
 
     // Call user B again from client A.
     active_call_a
@@ -1358,7 +1358,7 @@ async fn test_calls_on_multiple_connections(
 
     // User A reconnects automatically, then calls user B again.
     server.allow_connections();
-    executor.advance_clock(RECEIVE_TIMEOUT);
+    executor.advance_clock(RECONNECT_TIMEOUT);
     active_call_a
         .update(cx_a, |call, cx| {
             call.invite(client_b1.user_id().unwrap(), None, cx)