Make rodio audio input compile under windows (#37999)

David Kleingeld and Richard created

Follow up on https://github.com/zed-industries/zed/pull/37786

adds conditional cmp removing use of libwebrtc on windows/freebsd

They cant compile livekit yet. This removes microphone and echo
cancellation on those platforms however they can not join calls due to
the same cause so it does not matter.

Documentation and error handing improvements

Release Notes:

- N/A

---------

Co-authored-by: Richard <richard@zed.dev>

Change summary

Cargo.lock                                                  |  1 
crates/audio/Cargo.toml                                     |  1 
crates/audio/src/audio.rs                                   | 54 ++++--
crates/audio/src/audio_settings.rs                          |  5 
crates/audio/src/rodio_ext.rs                               |  7 
crates/livekit_client/src/livekit_client/playback.rs        |  2 
crates/livekit_client/src/livekit_client/playback/source.rs |  4 
7 files changed, 51 insertions(+), 23 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -1395,6 +1395,7 @@ dependencies = [
  "serde",
  "settings",
  "smol",
+ "thiserror 2.0.12",
  "util",
  "workspace-hack",
 ]

crates/audio/Cargo.toml 🔗

@@ -25,6 +25,7 @@ schemars.workspace = true
 serde.workspace = true
 settings.workspace = true
 smol.workspace = true
+thiserror.workspace = true
 util.workspace = true
 workspace-hack.workspace = true
 

crates/audio/src/audio.rs 🔗

@@ -1,24 +1,26 @@
 use anyhow::{Context as _, Result};
 use collections::HashMap;
-use gpui::{App, AsyncApp, BackgroundExecutor, BorrowAppContext, Global};
-use libwebrtc::native::apm;
-use log::info;
-use parking_lot::Mutex;
+use gpui::{App, BackgroundExecutor, BorrowAppContext, Global};
+
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
+mod non_windows_and_freebsd_deps {
+    pub(super) use gpui::AsyncApp;
+    pub(super) use libwebrtc::native::apm;
+    pub(super) use log::info;
+    pub(super) use parking_lot::Mutex;
+    pub(super) use rodio::cpal::Sample;
+    pub(super) use rodio::source::{LimitSettings, UniformSourceIterator};
+    pub(super) use std::sync::Arc;
+}
+
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
+use non_windows_and_freebsd_deps::*;
+
 use rodio::{
-    Decoder, OutputStream, OutputStreamBuilder, Source,
-    cpal::Sample,
-    mixer::Mixer,
-    nz,
-    source::{Buffered, LimitSettings, UniformSourceIterator},
+    Decoder, OutputStream, OutputStreamBuilder, Source, mixer::Mixer, nz, source::Buffered,
 };
 use settings::Settings;
-use std::{
-    io::Cursor,
-    num::NonZero,
-    path::PathBuf,
-    sync::{Arc, atomic::Ordering},
-    time::Duration,
-};
+use std::{io::Cursor, num::NonZero, path::PathBuf, sync::atomic::Ordering, time::Duration};
 use util::ResultExt;
 
 mod audio_settings;
@@ -76,6 +78,7 @@ impl Sound {
 pub struct Audio {
     output_handle: Option<OutputStream>,
     output_mixer: Option<Mixer>,
+    #[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
     pub echo_canceller: Arc<Mutex<apm::AudioProcessingModule>>,
     source_cache: HashMap<Sound, Buffered<Decoder<Cursor<Vec<u8>>>>>,
     replays: replays::Replays,
@@ -86,6 +89,10 @@ impl Default for Audio {
         Self {
             output_handle: Default::default(),
             output_mixer: Default::default(),
+            #[cfg(not(any(
+                all(target_os = "windows", target_env = "gnu"),
+                target_os = "freebsd"
+            )))]
             echo_canceller: Arc::new(Mutex::new(apm::AudioProcessingModule::new(
                 true, false, false, false,
             ))),
@@ -110,7 +117,16 @@ impl Audio {
                 mixer.add(rodio::source::Zero::new(CHANNEL_COUNT, SAMPLE_RATE));
                 self.output_mixer = Some(mixer);
 
+                // The webrtc apm is not yet compiling for windows & freebsd
+                #[cfg(not(any(
+                    any(all(target_os = "windows", target_env = "gnu")),
+                    target_os = "freebsd"
+                )))]
                 let echo_canceller = Arc::clone(&self.echo_canceller);
+                #[cfg(not(any(
+                    any(all(target_os = "windows", target_env = "gnu")),
+                    target_os = "freebsd"
+                )))]
                 let source = source.inspect_buffer::<BUFFER_SIZE, _>(move |buffer| {
                     let mut buf: [i16; _] = buffer.map(|s| s.to_sample());
                     echo_canceller
@@ -139,6 +155,7 @@ impl Audio {
         self.replays.replays_to_tar(executor)
     }
 
+    #[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
     pub fn open_microphone(voip_parts: VoipParts) -> anyhow::Result<impl Source> {
         let stream = rodio::microphone::MicrophoneBuilder::new()
             .default_device()?
@@ -174,8 +191,7 @@ impl Audio {
             .periodic_access(Duration::from_millis(100), move |agc_source| {
                 agc_source.set_enabled(LIVE_SETTINGS.control_input_volume.load(Ordering::Relaxed));
             })
-            .replayable(REPLAY_DURATION)
-            .expect("REPLAY_DURATION is longer then 100ms");
+            .replayable(REPLAY_DURATION)?;
 
         voip_parts
             .replays
@@ -249,11 +265,13 @@ impl Audio {
     }
 }
 
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
 pub struct VoipParts {
     echo_canceller: Arc<Mutex<apm::AudioProcessingModule>>,
     replays: replays::Replays,
 }
 
+#[cfg(not(any(all(target_os = "windows", target_env = "gnu"), target_os = "freebsd")))]
 impl VoipParts {
     pub fn new(cx: &AsyncApp) -> anyhow::Result<Self> {
         let (apm, replays) = cx.try_read_default_global::<Audio, _>(|audio, _| {

crates/audio/src/audio_settings.rs 🔗

@@ -59,6 +59,7 @@ impl Settings for AudioSettings {
     fn import_from_vscode(_vscode: &settings::VsCodeSettings, _current: &mut Self::FileContent) {}
 }
 
+/// See docs on [LIVE_SETTINGS]
 pub(crate) struct LiveSettings {
     pub(crate) control_input_volume: AtomicBool,
     pub(crate) control_output_volume: AtomicBool,
@@ -89,7 +90,9 @@ impl LiveSettings {
 }
 
 /// Allows access to settings from the audio thread. Updated by
-/// observer of SettingsStore.
+/// observer of SettingsStore. Needed because audio playback and recording are
+/// real time and must each run in a dedicated OS thread, therefore we can not
+/// use the background executor.
 pub(crate) static LIVE_SETTINGS: LiveSettings = LiveSettings {
     control_input_volume: AtomicBool::new(true),
     control_output_volume: AtomicBool::new(true),

crates/audio/src/rodio_ext.rs 🔗

@@ -9,7 +9,8 @@ use std::{
 use crossbeam::queue::ArrayQueue;
 use rodio::{ChannelCount, Sample, SampleRate, Source};
 
-#[derive(Debug)]
+#[derive(Debug, thiserror::Error)]
+#[error("Replay duration is too short must be >= 100ms")]
 pub struct ReplayDurationTooShort;
 
 pub trait RodioExt: Source + Sized {
@@ -338,6 +339,7 @@ impl<S: Source> Iterator for Replayable<S> {
     fn next(&mut self) -> Option<Self::Item> {
         if let Some(sample) = self.inner.next() {
             self.buffer.push(sample);
+            // If the buffer is full send it
             if self.buffer.len() == self.chunk_size {
                 self.tx.push_normal(std::mem::take(&mut self.buffer));
             }
@@ -422,6 +424,9 @@ impl Iterator for Replay {
                 return None;
             }
 
+            // The queue does not support blocking on a next item. We want this queue as it
+            // is quite fast and provides a fixed size. We know how many samples are in a
+            // buffer so if we do not get one now we must be getting one after `sleep_duration`.
             std::thread::sleep(self.sleep_duration);
         }
     }

crates/livekit_client/src/livekit_client/playback.rs 🔗

@@ -186,6 +186,8 @@ impl AudioStack {
         let capture_task = if rodio_pipeline {
             info!("Using experimental.rodio_audio audio pipeline");
             let voip_parts = audio::VoipParts::new(cx)?;
+            // Audio needs to run real-time and should never be paused. That is why we are using a
+            // normal std::thread and not a background task
             thread::spawn(move || {
                 // microphone is non send on mac
                 let microphone = audio::Audio::open_microphone(voip_parts)?;

crates/livekit_client/src/livekit_client/playback/source.rs 🔗

@@ -12,9 +12,7 @@ fn frame_to_samplesbuffer(frame: AudioFrame) -> SamplesBuffer {
     let samples = SampleTypeConverter::<_, _>::new(samples);
     let samples: Vec<f32> = samples.collect();
     SamplesBuffer::new(
-        // here be dragons
-        // NonZero::new(frame.num_channels as u16).expect("audio frame channels is nonzero"),
-        nz!(2),
+        nz!(2), // frame always has two channels
         NonZero::new(frame.sample_rate).expect("audio frame sample rate is nonzero"),
         samples,
     )