@@ -22,6 +22,7 @@ pub(crate) fn init(client: &AnyProtoClient) {
pub struct ChannelBuffer {
pub channel_id: ChannelId,
connected: bool,
+ rejoining: bool,
collaborators: HashMap<PeerId, Collaborator>,
user_store: Entity<UserStore>,
channel_store: Entity<ChannelStore>,
@@ -84,6 +85,7 @@ impl ChannelBuffer {
buffer_epoch: response.epoch,
client,
connected: true,
+ rejoining: false,
collaborators: Default::default(),
acknowledge_task: None,
channel_id: channel.id,
@@ -111,6 +113,7 @@ impl ChannelBuffer {
pub fn connected(&mut self, cx: &mut Context<Self>) {
self.connected = true;
+ self.rejoining = false;
if self.subscription.is_none() {
let Ok(subscription) = self.client.subscribe_to_entity(self.channel_id.0) else {
return;
@@ -120,6 +123,10 @@ impl ChannelBuffer {
}
}
+ pub(crate) fn set_rejoining(&mut self, rejoining: bool) {
+ self.rejoining = rejoining;
+ }
+
pub fn remote_id(&self, cx: &App) -> BufferId {
self.buffer.read(cx).remote_id()
}
@@ -204,6 +211,9 @@ impl ChannelBuffer {
return;
}
let operation = language::proto::serialize_operation(operation);
+ if self.rejoining {
+ return;
+ }
self.client
.send(proto::UpdateChannelBuffer {
channel_id: self.channel_id.0,
@@ -263,6 +273,7 @@ impl ChannelBuffer {
log::info!("channel buffer {} disconnected", self.channel_id);
if self.connected {
self.connected = false;
+ self.rejoining = false;
self.subscription.take();
cx.emit(ChannelBufferEvent::Disconnected);
cx.notify()
@@ -855,12 +855,18 @@ impl ChannelStore {
if let OpenEntityHandle::Open(buffer) = buffer
&& let Some(buffer) = buffer.upgrade()
{
- let channel_buffer = buffer.read(cx);
- let buffer = channel_buffer.buffer().read(cx);
- buffer_versions.push(proto::ChannelBufferVersion {
- channel_id: channel_buffer.channel_id.0,
- epoch: channel_buffer.epoch(),
- version: language::proto::serialize_version(&buffer.version()),
+ buffer.update(cx, |channel_buffer, cx| {
+ // Block on_buffer_update from sending UpdateChannelBuffer messages
+ // until the rejoin completes. This prevents a race condition where
+ // edits made during the rejoin async gap could inflate the server
+ // version, causing offline edits to be filtered out by serialize_ops.
+ channel_buffer.set_rejoining(true);
+ let inner_buffer = channel_buffer.buffer().read(cx);
+ buffer_versions.push(proto::ChannelBufferVersion {
+ channel_id: channel_buffer.channel_id.0,
+ epoch: channel_buffer.epoch(),
+ version: language::proto::serialize_version(&inner_buffer.version()),
+ });
});
}
}
@@ -874,7 +880,26 @@ impl ChannelStore {
});
cx.spawn(async move |this, cx| {
- let mut response = response.await?;
+ let response = match response.await {
+ Ok(response) => response,
+ Err(err) => {
+ // Clear rejoining flag on all buffers since the rejoin failed
+ this.update(cx, |this, cx| {
+ for buffer in this.opened_buffers.values() {
+ if let OpenEntityHandle::Open(buffer) = buffer {
+ if let Some(buffer) = buffer.upgrade() {
+ buffer.update(cx, |channel_buffer, _| {
+ channel_buffer.set_rejoining(false);
+ });
+ }
+ }
+ }
+ })
+ .ok();
+ return Err(err);
+ }
+ };
+ let mut response = response;
this.update(cx, |this, cx| {
this.opened_buffers.retain(|_, buffer| match buffer {
@@ -948,6 +973,22 @@ impl ChannelStore {
fn handle_disconnect(&mut self, wait_for_reconnect: bool, cx: &mut Context<Self>) {
cx.notify();
self.did_subscribe = false;
+
+ // If we're waiting for reconnect, set rejoining=true on all buffers immediately.
+ // This prevents operations from being sent during the reconnection window,
+ // before handle_connect has a chance to run and capture the version.
+ if wait_for_reconnect {
+ for buffer in self.opened_buffers.values() {
+ if let OpenEntityHandle::Open(buffer) = buffer {
+ if let Some(buffer) = buffer.upgrade() {
+ buffer.update(cx, |channel_buffer, _| {
+ channel_buffer.set_rejoining(true);
+ });
+ }
+ }
+ }
+ }
+
self.disconnect_channel_buffers_task.get_or_insert_with(|| {
cx.spawn(async move |this, cx| {
if wait_for_reconnect {
@@ -3,6 +3,7 @@ use call::ActiveCall;
use channel::ACKNOWLEDGE_DEBOUNCE_INTERVAL;
use client::{Collaborator, ParticipantIndex, UserId};
use collab::rpc::{CLEANUP_TIMEOUT, RECONNECT_TIMEOUT};
+
use collab_ui::channel_view::ChannelView;
use collections::HashMap;
use editor::{Anchor, Editor, MultiBufferOffset, ToOffset};
@@ -698,6 +699,165 @@ async fn test_channel_buffer_changes_persist(
});
}
+#[gpui::test]
+async fn test_channel_buffer_operations_lost_on_reconnect(
+ executor: BackgroundExecutor,
+ cx_a: &mut TestAppContext,
+ cx_b: &mut TestAppContext,
+) {
+ let mut server = TestServer::start(executor.clone()).await;
+ let client_a = server.create_client(cx_a, "user_a").await;
+ let client_b = server.create_client(cx_b, "user_b").await;
+
+ let channel_id = server
+ .make_channel(
+ "the-channel",
+ None,
+ (&client_a, cx_a),
+ &mut [(&client_b, cx_b)],
+ )
+ .await;
+
+ // Both clients open the channel buffer.
+ let channel_buffer_a = client_a
+ .channel_store()
+ .update(cx_a, |store, cx| store.open_channel_buffer(channel_id, cx))
+ .await
+ .unwrap();
+ let channel_buffer_b = client_b
+ .channel_store()
+ .update(cx_b, |store, cx| store.open_channel_buffer(channel_id, cx))
+ .await
+ .unwrap();
+
+ // Step 1: Client A makes an initial edit that syncs to B.
+ channel_buffer_a.update(cx_a, |buffer, cx| {
+ buffer.buffer().update(cx, |buffer, cx| {
+ buffer.edit([(0..0, "a")], None, cx);
+ })
+ });
+ executor.run_until_parked();
+
+ // Verify both clients see "a".
+ channel_buffer_a.read_with(cx_a, |buffer, cx| {
+ assert_eq!(buffer.buffer().read(cx).text(), "a");
+ });
+ channel_buffer_b.read_with(cx_b, |buffer, cx| {
+ assert_eq!(buffer.buffer().read(cx).text(), "a");
+ });
+
+ // Step 2: Disconnect client A. Do NOT advance past RECONNECT_TIMEOUT
+ // so that the buffer stays in `opened_buffers` for rejoin.
+ server.forbid_connections();
+ server.disconnect_client(client_a.peer_id().unwrap());
+ executor.run_until_parked();
+
+ // Step 3: While disconnected, client A makes an offline edit ("b").
+ // on_buffer_update fires but client.send() fails because transport is down.
+ channel_buffer_a.update(cx_a, |buffer, cx| {
+ buffer.buffer().update(cx, |buffer, cx| {
+ buffer.edit([(1..1, "b")], None, cx);
+ })
+ });
+ executor.run_until_parked();
+
+ // Client A sees "ab" locally; B still sees "a".
+ channel_buffer_a.read_with(cx_a, |buffer, cx| {
+ assert_eq!(buffer.buffer().read(cx).text(), "ab");
+ });
+ channel_buffer_b.read_with(cx_b, |buffer, cx| {
+ assert_eq!(buffer.buffer().read(cx).text(), "a");
+ });
+
+ // Step 4: Reconnect and make a racing edit in parallel.
+ //
+ // The race condition occurs when:
+ // 1. Transport reconnects, handle_connect captures version V (with "b") and sends RejoinChannelBuffers
+ // 2. DURING the async gap (awaiting response), user makes edit "c"
+ // 3. on_buffer_update sends UpdateChannelBuffer (succeeds because transport is up)
+ // 4. Server receives BOTH messages concurrently (FuturesUnordered)
+ // 5. If UpdateChannelBuffer commits first, server version is inflated to include "c"
+ // 6. RejoinChannelBuffers reads inflated version and sends it back
+ // 7. Client's serialize_ops(inflated_version) filters out "b" (offline edit)
+ // because the inflated version's timestamp covers "b"'s timestamp
+
+ // Get the buffer handle for spawning
+ let buffer_for_edit = channel_buffer_a.read_with(cx_a, |buffer, _| buffer.buffer());
+
+ // Spawn the edit task - it will wait for executor to run it
+ let edit_task = cx_a.spawn({
+ let buffer = buffer_for_edit;
+ async move |mut cx| {
+ let _ = buffer.update(&mut cx, |buffer, cx| {
+ buffer.edit([(2..2, "c")], None, cx);
+ });
+ }
+ });
+
+ // Allow connections so reconnect can succeed
+ server.allow_connections();
+
+ // Advance clock to trigger reconnection attempt
+ executor.advance_clock(RECEIVE_TIMEOUT);
+
+ // Run the edit task - this races with handle_connect
+ edit_task.detach();
+
+ // Let everything settle.
+ executor.run_until_parked();
+
+ // Step 7: Read final buffer text from both clients.
+ let text_a = channel_buffer_a.read_with(cx_a, |buffer, cx| buffer.buffer().read(cx).text());
+ let text_b = channel_buffer_b.read_with(cx_b, |buffer, cx| buffer.buffer().read(cx).text());
+
+ // Both clients must see the same text containing all three edits.
+ assert_eq!(
+ text_a, text_b,
+ "Client A and B diverged! A sees {:?}, B sees {:?}. \
+ Operations were lost during reconnection.",
+ text_a, text_b
+ );
+ assert!(
+ text_a.contains('a'),
+ "Initial edit 'a' missing from final text {:?}",
+ text_a
+ );
+ assert!(
+ text_a.contains('b'),
+ "Offline edit 'b' missing from final text {:?}. \
+ This is the reconnection race bug: the offline operation was \
+ filtered out by serialize_ops because the server_version was \
+ inflated by a racing UpdateChannelBuffer.",
+ text_a
+ );
+ assert!(
+ text_a.contains('c'),
+ "Racing edit 'c' missing from final text {:?}",
+ text_a
+ );
+
+ // Step 8: Verify the invariant directly — every operation known to
+ // client A must be observed by client B's version. If any operation
+ // in A's history is not covered by B's version, it was lost.
+ channel_buffer_a.read_with(cx_a, |buf_a, cx_a_inner| {
+ let buffer_a = buf_a.buffer().read(cx_a_inner);
+ let ops_a = buffer_a.operations();
+ channel_buffer_b.read_with(cx_b, |buf_b, cx_b_inner| {
+ let buffer_b = buf_b.buffer().read(cx_b_inner);
+ let version_b = buffer_b.version();
+ for (lamport, _op) in ops_a.iter() {
+ assert!(
+ version_b.observed(*lamport),
+ "Operation with lamport timestamp {:?} from client A \
+ is NOT observed by client B's version. This operation \
+ was lost during reconnection.",
+ lamport
+ );
+ }
+ });
+ });
+}
+
#[track_caller]
fn assert_collaborators(collaborators: &HashMap<PeerId, Collaborator>, ids: &[Option<UserId>]) {
let mut user_ids = collaborators