From e5c03730115e5578567d0f99edf374dc1296f3ee Mon Sep 17 00:00:00 2001
From: David Kleingeld <davidsk@zed.dev>
Date: Thu, 11 Sep 2025 16:45:42 +0200
Subject: [PATCH] Make rodio audio input compile under windows (#37999)

Follow up on https://github.com/zed-industries/zed/pull/37786

adds conditional cmp removing use of libwebrtc on windows/freebsd

They cant compile livekit yet. This removes microphone and echo
cancellation on those platforms however they can not join calls due to
the same cause so it does not matter.

Documentation and error handing improvements

Release Notes:

- N/A

---------

Co-authored-by: Richard <richard@zed.dev>
---
 Cargo.lock                                    |  1 +
 crates/audio/Cargo.toml                       |  1 +
 crates/audio/src/audio.rs                     | 54 ++++++++++++-------
 crates/audio/src/audio_settings.rs            |  5 +-
 crates/audio/src/rodio_ext.rs                 |  7 ++-
 .../src/livekit_client/playback.rs            |  2 +
 .../src/livekit_client/playback/source.rs     |  4 +-
 7 files changed, 51 insertions(+), 23 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 1488874d6b2637203e1dfe5a3a73e447a38c3cad..82e1b202cb5ac18870bc0cf825847d711835c9f6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1395,6 +1395,7 @@ dependencies = [
  "serde",
  "settings",
  "smol",
+ "thiserror 2.0.12",
  "util",
  "workspace-hack",
 ]
diff --git a/crates/audio/Cargo.toml b/crates/audio/Cargo.toml
index 08e0df424dcdaa15cfd78fddaf5758fb9b8d7e0b..85274f651417f8df91e2f785056e5ee8da0220de 100644
--- a/crates/audio/Cargo.toml
+++ b/crates/audio/Cargo.toml
@@ -25,6 +25,7 @@ schemars.workspace = true
 serde.workspace = true
 settings.workspace = true
 smol.workspace = true
+thiserror.workspace = true
 util.workspace = true
 workspace-hack.workspace = true
 
diff --git a/crates/audio/src/audio.rs b/crates/audio/src/audio.rs
index 511d00671ae99789610bac1f7e30b63ca29ac480..44f2d5e07d5e7ddb9bbb18925bb8bdbe7581e903 100644
--- a/crates/audio/src/audio.rs
+++ b/crates/audio/src/audio.rs
@@ -1,24 +1,26 @@
 use anyhow::{Context as _, Result};
 use collections::HashMap;
-use gpui::{App, AsyncApp, BackgroundExecutor, BorrowAppContext, Global};
-use libwebrtc::native::apm;
-use log::info;
-use parking_lot::Mutex;
+use gpui::{App, BackgroundExecutor, BorrowAppContext, Global};
+
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
+mod non_windows_and_freebsd_deps {
+    pub(super) use gpui::AsyncApp;
+    pub(super) use libwebrtc::native::apm;
+    pub(super) use log::info;
+    pub(super) use parking_lot::Mutex;
+    pub(super) use rodio::cpal::Sample;
+    pub(super) use rodio::source::{LimitSettings, UniformSourceIterator};
+    pub(super) use std::sync::Arc;
+}
+
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
+use non_windows_and_freebsd_deps::*;
+
 use rodio::{
-    Decoder, OutputStream, OutputStreamBuilder, Source,
-    cpal::Sample,
-    mixer::Mixer,
-    nz,
-    source::{Buffered, LimitSettings, UniformSourceIterator},
+    Decoder, OutputStream, OutputStreamBuilder, Source, mixer::Mixer, nz, source::Buffered,
 };
 use settings::Settings;
-use std::{
-    io::Cursor,
-    num::NonZero,
-    path::PathBuf,
-    sync::{Arc, atomic::Ordering},
-    time::Duration,
-};
+use std::{io::Cursor, num::NonZero, path::PathBuf, sync::atomic::Ordering, time::Duration};
 use util::ResultExt;
 
 mod audio_settings;
@@ -76,6 +78,7 @@ impl Sound {
 pub struct Audio {
     output_handle: Option<OutputStream>,
     output_mixer: Option<Mixer>,
+    #[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
     pub echo_canceller: Arc<Mutex<apm::AudioProcessingModule>>,
     source_cache: HashMap<Sound, Buffered<Decoder<Cursor<Vec<u8>>>>>,
     replays: replays::Replays,
@@ -86,6 +89,10 @@ impl Default for Audio {
         Self {
             output_handle: Default::default(),
             output_mixer: Default::default(),
+            #[cfg(not(any(
+                all(target_os = "windows", target_env = "gnu"),
+                target_os = "freebsd"
+            )))]
             echo_canceller: Arc::new(Mutex::new(apm::AudioProcessingModule::new(
                 true, false, false, false,
             ))),
@@ -110,7 +117,16 @@ impl Audio {
                 mixer.add(rodio::source::Zero::new(CHANNEL_COUNT, SAMPLE_RATE));
                 self.output_mixer = Some(mixer);
 
+                // The webrtc apm is not yet compiling for windows & freebsd
+                #[cfg(not(any(
+                    any(all(target_os = "windows", target_env = "gnu")),
+                    target_os = "freebsd"
+                )))]
                 let echo_canceller = Arc::clone(&self.echo_canceller);
+                #[cfg(not(any(
+                    any(all(target_os = "windows", target_env = "gnu")),
+                    target_os = "freebsd"
+                )))]
                 let source = source.inspect_buffer::<BUFFER_SIZE, _>(move |buffer| {
                     let mut buf: [i16; _] = buffer.map(|s| s.to_sample());
                     echo_canceller
@@ -139,6 +155,7 @@ impl Audio {
         self.replays.replays_to_tar(executor)
     }
 
+    #[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
     pub fn open_microphone(voip_parts: VoipParts) -> anyhow::Result<impl Source> {
         let stream = rodio::microphone::MicrophoneBuilder::new()
             .default_device()?
@@ -174,8 +191,7 @@ impl Audio {
             .periodic_access(Duration::from_millis(100), move |agc_source| {
                 agc_source.set_enabled(LIVE_SETTINGS.control_input_volume.load(Ordering::Relaxed));
             })
-            .replayable(REPLAY_DURATION)
-            .expect("REPLAY_DURATION is longer then 100ms");
+            .replayable(REPLAY_DURATION)?;
 
         voip_parts
             .replays
@@ -249,11 +265,13 @@ impl Audio {
     }
 }
 
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
 pub struct VoipParts {
     echo_canceller: Arc<Mutex<apm::AudioProcessingModule>>,
     replays: replays::Replays,
 }
 
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
 impl VoipParts {
     pub fn new(cx: &AsyncApp) -> anyhow::Result<Self> {
         let (apm, replays) = cx.try_read_default_global::<Audio, _>(|audio, _| {
diff --git a/crates/audio/src/audio_settings.rs b/crates/audio/src/audio_settings.rs
index 43edb8d60d96122d5515ec7274a6b5725b247ca0..ea0ea5f3558e015f5579cca43eeb8c529273cb52 100644
--- a/crates/audio/src/audio_settings.rs
+++ b/crates/audio/src/audio_settings.rs
@@ -59,6 +59,7 @@ impl Settings for AudioSettings {
     fn import_from_vscode(_vscode: &settings::VsCodeSettings, _current: &mut Self::FileContent) {}
 }
 
+/// See docs on [LIVE_SETTINGS]
 pub(crate) struct LiveSettings {
     pub(crate) control_input_volume: AtomicBool,
     pub(crate) control_output_volume: AtomicBool,
@@ -89,7 +90,9 @@ impl LiveSettings {
 }
 
 /// Allows access to settings from the audio thread. Updated by
-/// observer of SettingsStore.
+/// observer of SettingsStore. Needed because audio playback and recording are
+/// real time and must each run in a dedicated OS thread, therefore we can not
+/// use the background executor.
 pub(crate) static LIVE_SETTINGS: LiveSettings = LiveSettings {
     control_input_volume: AtomicBool::new(true),
     control_output_volume: AtomicBool::new(true),
diff --git a/crates/audio/src/rodio_ext.rs b/crates/audio/src/rodio_ext.rs
index 4e9430a0b9462448b879f653f9ddcb06ef892cdb..ba4e4ff0554dd3c9bc2a7e2691de270c0d00908b 100644
--- a/crates/audio/src/rodio_ext.rs
+++ b/crates/audio/src/rodio_ext.rs
@@ -9,7 +9,8 @@ use std::{
 use crossbeam::queue::ArrayQueue;
 use rodio::{ChannelCount, Sample, SampleRate, Source};
 
-#[derive(Debug)]
+#[derive(Debug, thiserror::Error)]
+#[error("Replay duration is too short must be >= 100ms")]
 pub struct ReplayDurationTooShort;
 
 pub trait RodioExt: Source + Sized {
@@ -338,6 +339,7 @@ impl<S: Source> Iterator for Replayable<S> {
     fn next(&mut self) -> Option<Self::Item> {
         if let Some(sample) = self.inner.next() {
             self.buffer.push(sample);
+            // If the buffer is full send it
             if self.buffer.len() == self.chunk_size {
                 self.tx.push_normal(std::mem::take(&mut self.buffer));
             }
@@ -422,6 +424,9 @@ impl Iterator for Replay {
                 return None;
             }
 
+            // The queue does not support blocking on a next item. We want this queue as it
+            // is quite fast and provides a fixed size. We know how many samples are in a
+            // buffer so if we do not get one now we must be getting one after `sleep_duration`.
             std::thread::sleep(self.sleep_duration);
         }
     }
diff --git a/crates/livekit_client/src/livekit_client/playback.rs b/crates/livekit_client/src/livekit_client/playback.rs
index d1b2cee4aa1750ba4b8af3033e44b1fe9fbe78de..7c866113103a883e7e7a2d9d3f5651d833d7e637 100644
--- a/crates/livekit_client/src/livekit_client/playback.rs
+++ b/crates/livekit_client/src/livekit_client/playback.rs
@@ -186,6 +186,8 @@ impl AudioStack {
         let capture_task = if rodio_pipeline {
             info!("Using experimental.rodio_audio audio pipeline");
             let voip_parts = audio::VoipParts::new(cx)?;
+            // Audio needs to run real-time and should never be paused. That is why we are using a
+            // normal std::thread and not a background task
             thread::spawn(move || {
                 // microphone is non send on mac
                 let microphone = audio::Audio::open_microphone(voip_parts)?;
diff --git a/crates/livekit_client/src/livekit_client/playback/source.rs b/crates/livekit_client/src/livekit_client/playback/source.rs
index 67bfe793902da94a114ca617ce5bfa33c68d02e7..f605b3d517cd816491f0eceadce5ac778ef75d21 100644
--- a/crates/livekit_client/src/livekit_client/playback/source.rs
+++ b/crates/livekit_client/src/livekit_client/playback/source.rs
@@ -12,9 +12,7 @@ fn frame_to_samplesbuffer(frame: AudioFrame) -> SamplesBuffer {
     let samples = SampleTypeConverter::<_, _>::new(samples);
     let samples: Vec<f32> = samples.collect();
     SamplesBuffer::new(
-        // here be dragons
-        // NonZero::new(frame.num_channels as u16).expect("audio frame channels is nonzero"),
-        nz!(2),
+        nz!(2), // frame always has two channels
         NonZero::new(frame.sample_rate).expect("audio frame sample rate is nonzero"),
         samples,
     )