diff --git a/crates/audio/src/audio.rs b/crates/audio/src/audio.rs index 35d8b318e52568bada029c645146e128f9895965..3e9b32c9fba6a4b3deb954f76634fb0d8f2580b9 100644 --- a/crates/audio/src/audio.rs +++ b/crates/audio/src/audio.rs @@ -31,18 +31,20 @@ pub use rodio_ext::RodioExt; use crate::audio_settings::LIVE_SETTINGS; -// NOTE: We used to use WebRTC's mixer which only supported -// 16kHz, 32kHz and 48kHz. As 48 is the most common "next step up" -// for audio output devices like speakers/bluetooth, we just hard-code -// this; and downsample when we need to. +// We are migrating to 16kHz sample rate from 48kHz. In the future +// once we are reasonably sure most users have upgraded we will +// remove the LEGACY parameters. // -// Since most noise cancelling requires 16kHz we will move to -// that in the future. -pub const SAMPLE_RATE: NonZero = nz!(48000); -pub const CHANNEL_COUNT: NonZero = nz!(2); +// We migrate to 16kHz because it is sufficient for speech and required +// by the denoiser and future Speech to Text layers. +pub const SAMPLE_RATE: NonZero = nz!(16000); +pub const CHANNEL_COUNT: NonZero = nz!(1); pub const BUFFER_SIZE: usize = // echo canceller and livekit want 10ms of audio (SAMPLE_RATE.get() as usize / 100) * CHANNEL_COUNT.get() as usize; +pub const LEGACY_SAMPLE_RATE: NonZero = nz!(48000); +pub const LEGACY_CHANNEL_COUNT: NonZero = nz!(2); + pub const REPLAY_DURATION: Duration = Duration::from_secs(30); pub fn init(cx: &mut App) { @@ -160,8 +162,13 @@ impl Audio { let stream = rodio::microphone::MicrophoneBuilder::new() .default_device()? .default_config()? - .prefer_sample_rates([SAMPLE_RATE, SAMPLE_RATE.saturating_mul(nz!(2))]) - .prefer_channel_counts([nz!(1), nz!(2)]) + .prefer_sample_rates([ + SAMPLE_RATE, // sample rates trivially resamplable to `SAMPLE_RATE` + SAMPLE_RATE.saturating_mul(nz!(2)), + SAMPLE_RATE.saturating_mul(nz!(3)), + SAMPLE_RATE.saturating_mul(nz!(4)), + ]) + .prefer_channel_counts([CHANNEL_COUNT, CHANNEL_COUNT.saturating_mul(nz!(2))]) .prefer_buffer_sizes(512..) .open_stream()?; info!("Opened microphone: {:?}", stream.config()); @@ -189,15 +196,24 @@ impl Audio { } } }) + .denoise() + .context("Could not set up denoiser")? + .periodic_access(Duration::from_millis(100), move |denoise| { + denoise.set_enabled(LIVE_SETTINGS.denoise.load(Ordering::Relaxed)); + }) .automatic_gain_control(1.0, 4.0, 0.0, 5.0) .periodic_access(Duration::from_millis(100), move |agc_source| { - agc_source.set_enabled(LIVE_SETTINGS.control_input_volume.load(Ordering::Relaxed)); + agc_source + .set_enabled(LIVE_SETTINGS.auto_microphone_volume.load(Ordering::Relaxed)); }) .replayable(REPLAY_DURATION)?; voip_parts .replays .add_voip_stream("local microphone".to_string(), replay); + + let stream = stream.constant_params(LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE); + Ok(stream) } @@ -210,7 +226,7 @@ impl Audio { let (replay_source, source) = source .automatic_gain_control(1.0, 4.0, 0.0, 5.0) .periodic_access(Duration::from_millis(100), move |agc_source| { - agc_source.set_enabled(LIVE_SETTINGS.control_input_volume.load(Ordering::Relaxed)); + agc_source.set_enabled(LIVE_SETTINGS.auto_speaker_volume.load(Ordering::Relaxed)); }) .replayable(REPLAY_DURATION) .expect("REPLAY_DURATION is longer than 100ms"); diff --git a/crates/audio/src/audio_settings.rs b/crates/audio/src/audio_settings.rs index ea0ea5f3558e015f5579cca43eeb8c529273cb52..110d4b8cf6a49082b369166545c547e0f6585b11 100644 --- a/crates/audio/src/audio_settings.rs +++ b/crates/audio/src/audio_settings.rs @@ -9,44 +9,107 @@ use settings::{Settings, SettingsKey, SettingsSources, SettingsStore, SettingsUi #[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug, SettingsUi)] pub struct AudioSettings { /// Opt into the new audio system. + /// + /// You need to rejoin a call for this setting to apply #[serde(rename = "experimental.rodio_audio", default)] pub rodio_audio: bool, // default is false /// Requires 'rodio_audio: true' /// - /// Use the new audio systems automatic gain control for your microphone. - /// This affects how loud you sound to others. - #[serde(rename = "experimental.control_input_volume", default)] - pub control_input_volume: bool, + /// Automatically increase or decrease you microphone's volume. This affects how + /// loud you sound to others. + /// + /// Recommended: off (default) + /// Microphones are too quite in zed, until everyone is on experimental + /// audio and has auto speaker volume on this will make you very loud + /// compared to other speakers. + #[serde( + rename = "experimental.auto_microphone_volume", + default = "default_false" + )] + pub auto_microphone_volume: bool, + /// Requires 'rodio_audio: true' + /// + /// Automatically increate or decrease the volume of other call members. + /// This only affects how things sound for you. + #[serde(rename = "experimental.auto_speaker_volume", default = "default_true")] + pub auto_speaker_volume: bool, + /// Requires 'rodio_audio: true' + /// + /// Remove background noises. Works great for typing, cars, dogs, AC. Does + /// not work well on music. + #[serde(rename = "experimental.denoise", default = "default_false")] + pub denoise: bool, /// Requires 'rodio_audio: true' /// - /// Use the new audio systems automatic gain control on everyone in the - /// call. This makes call members who are too quite louder and those who are - /// too loud quieter. This only affects how things sound for you. - #[serde(rename = "experimental.control_output_volume", default)] - pub control_output_volume: bool, + /// Use audio parameters compatible with the previous versions of + /// experimental audio and non-experimental audio. When this is false you + /// will sound strange to anyone not on the latest experimental audio. In + /// the future we will migrate by setting this to false + /// + /// You need to rejoin a call for this setting to apply + #[serde( + rename = "experimental.legacy_audio_compatible", + default = "default_true" + )] + pub legacy_audio_compatible: bool, } - /// Configuration of audio in Zed. #[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug, SettingsUi, SettingsKey)] #[serde(default)] #[settings_key(key = "audio")] pub struct AudioSettingsContent { /// Opt into the new audio system. + /// + /// You need to rejoin a call for this setting to apply #[serde(rename = "experimental.rodio_audio", default)] pub rodio_audio: bool, // default is false /// Requires 'rodio_audio: true' /// - /// Use the new audio systems automatic gain control for your microphone. - /// This affects how loud you sound to others. - #[serde(rename = "experimental.control_input_volume", default)] - pub control_input_volume: bool, + /// Automatically increase or decrease you microphone's volume. This affects how + /// loud you sound to others. + /// + /// Recommended: off (default) + /// Microphones are too quite in zed, until everyone is on experimental + /// audio and has auto speaker volume on this will make you very loud + /// compared to other speakers. + #[serde( + rename = "experimental.auto_microphone_volume", + default = "default_false" + )] + pub auto_microphone_volume: bool, + /// Requires 'rodio_audio: true' + /// + /// Automatically increate or decrease the volume of other call members. + /// This only affects how things sound for you. + #[serde(rename = "experimental.auto_speaker_volume", default = "default_true")] + pub auto_speaker_volume: bool, /// Requires 'rodio_audio: true' /// - /// Use the new audio systems automatic gain control on everyone in the - /// call. This makes call members who are too quite louder and those who are - /// too loud quieter. This only affects how things sound for you. - #[serde(rename = "experimental.control_output_volume", default)] - pub control_output_volume: bool, + /// Remove background noises. Works great for typing, cars, dogs, AC. Does + /// not work well on music. + #[serde(rename = "experimental.denoise", default = "default_false")] + pub denoise: bool, + /// Requires 'rodio_audio: true' + /// + /// Use audio parameters compatible with the previous versions of + /// experimental audio and non-experimental audio. When this is false you + /// will sound strange to anyone not on the latest experimental audio. In + /// the future we will migrate by setting this to false + /// + /// You need to rejoin a call for this setting to apply + #[serde( + rename = "experimental.legacy_audio_compatible", + default = "default_true" + )] + pub legacy_audio_compatible: bool, +} + +fn default_true() -> bool { + true +} + +fn default_false() -> bool { + false } impl Settings for AudioSettings { @@ -61,31 +124,38 @@ impl Settings for AudioSettings { /// See docs on [LIVE_SETTINGS] pub(crate) struct LiveSettings { - pub(crate) control_input_volume: AtomicBool, - pub(crate) control_output_volume: AtomicBool, + pub(crate) auto_microphone_volume: AtomicBool, + pub(crate) auto_speaker_volume: AtomicBool, + pub(crate) denoise: AtomicBool, } impl LiveSettings { pub(crate) fn initialize(&self, cx: &mut App) { cx.observe_global::(move |cx| { - LIVE_SETTINGS.control_input_volume.store( - AudioSettings::get_global(cx).control_input_volume, + LIVE_SETTINGS.auto_microphone_volume.store( + AudioSettings::get_global(cx).auto_microphone_volume, Ordering::Relaxed, ); - LIVE_SETTINGS.control_output_volume.store( - AudioSettings::get_global(cx).control_output_volume, + LIVE_SETTINGS.auto_speaker_volume.store( + AudioSettings::get_global(cx).auto_speaker_volume, Ordering::Relaxed, ); + LIVE_SETTINGS + .denoise + .store(AudioSettings::get_global(cx).denoise, Ordering::Relaxed); }) .detach(); let init_settings = AudioSettings::get_global(cx); LIVE_SETTINGS - .control_input_volume - .store(init_settings.control_input_volume, Ordering::Relaxed); + .auto_microphone_volume + .store(init_settings.auto_microphone_volume, Ordering::Relaxed); + LIVE_SETTINGS + .auto_speaker_volume + .store(init_settings.auto_speaker_volume, Ordering::Relaxed); LIVE_SETTINGS - .control_output_volume - .store(init_settings.control_output_volume, Ordering::Relaxed); + .denoise + .store(init_settings.denoise, Ordering::Relaxed); } } @@ -94,6 +164,7 @@ impl LiveSettings { /// real time and must each run in a dedicated OS thread, therefore we can not /// use the background executor. pub(crate) static LIVE_SETTINGS: LiveSettings = LiveSettings { - control_input_volume: AtomicBool::new(true), - control_output_volume: AtomicBool::new(true), + auto_microphone_volume: AtomicBool::new(true), + auto_speaker_volume: AtomicBool::new(true), + denoise: AtomicBool::new(true), }; diff --git a/crates/livekit_client/src/livekit_client.rs b/crates/livekit_client/src/livekit_client.rs index 45e929cb2ec0bebf054497632d614af1975f6397..04e669869ddbf64ffd92cbcad4bf927bfec55cb5 100644 --- a/crates/livekit_client/src/livekit_client.rs +++ b/crates/livekit_client/src/livekit_client.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use anyhow::{Context as _, Result}; +use anyhow::{Context as _, Result, anyhow}; use audio::AudioSettings; use collections::HashMap; use futures::{SinkExt, channel::mpsc}; @@ -12,7 +12,10 @@ use settings::Settings; mod playback; -use crate::{LocalTrack, Participant, RemoteTrack, RoomEvent, TrackPublication}; +use crate::{ + LocalTrack, Participant, RemoteTrack, RoomEvent, TrackPublication, + livekit_client::playback::Speaker, +}; pub use playback::AudioStream; pub(crate) use playback::{RemoteVideoFrame, play_remote_video_track}; @@ -132,11 +135,20 @@ impl Room { track: &RemoteAudioTrack, cx: &mut App, ) -> Result { + let speaker: Speaker = + serde_urlencoded::from_str(&track.0.name()).unwrap_or_else(|_| Speaker { + name: track.0.name(), + is_staff: false, + legacy_audio_compatible: true, + }); + if AudioSettings::get_global(cx).rodio_audio { info!("Using experimental.rodio_audio audio pipeline for output"); - playback::play_remote_audio_track(&track.0, cx) - } else { + playback::play_remote_audio_track(&track.0, speaker, cx) + } else if speaker.legacy_audio_compatible { Ok(self.playback.play_remote_audio_track(&track.0)) + } else { + Err(anyhow!("Client version too old to play audio in call")) } } } diff --git a/crates/livekit_client/src/livekit_client/playback.rs b/crates/livekit_client/src/livekit_client/playback.rs index df8b5ea54fb1ce11bf871faa912757bbff1fd7f9..5683ee3f98c5e23488c876789170b114b71d7053 100644 --- a/crates/livekit_client/src/livekit_client/playback.rs +++ b/crates/livekit_client/src/livekit_client/playback.rs @@ -1,6 +1,6 @@ use anyhow::{Context as _, Result}; -use audio::{AudioSettings, CHANNEL_COUNT, SAMPLE_RATE}; +use audio::{AudioSettings, CHANNEL_COUNT, LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE, SAMPLE_RATE}; use cpal::traits::{DeviceTrait, StreamTrait as _}; use futures::channel::mpsc::UnboundedSender; use futures::{Stream, StreamExt as _}; @@ -43,12 +43,17 @@ pub(crate) struct AudioStack { pub(crate) fn play_remote_audio_track( track: &livekit::track::RemoteAudioTrack, + speaker: Speaker, cx: &mut gpui::App, ) -> Result { + let stream = source::LiveKitStream::new( + cx.background_executor(), + track, + speaker.legacy_audio_compatible, + ); + let stop_handle = Arc::new(AtomicBool::new(false)); let stop_handle_clone = stop_handle.clone(); - let stream = source::LiveKitStream::new(cx.background_executor(), track); - let stream = stream .stoppable() .periodic_access(Duration::from_millis(50), move |s| { @@ -57,10 +62,6 @@ pub(crate) fn play_remote_audio_track( } }); - let speaker: Speaker = serde_urlencoded::from_str(&track.name()).unwrap_or_else(|_| Speaker { - name: track.name(), - is_staff: false, - }); audio::Audio::play_voip_stream(stream, speaker.name, speaker.is_staff, cx) .context("Could not play audio")?; @@ -152,17 +153,32 @@ impl AudioStack { is_staff: bool, cx: &AsyncApp, ) -> Result<(crate::LocalAudioTrack, AudioStream)> { - let source = NativeAudioSource::new( - // n.b. this struct's options are always ignored, noise cancellation is provided by apm. - AudioSourceOptions::default(), - SAMPLE_RATE.get(), - CHANNEL_COUNT.get().into(), - 10, - ); + let legacy_audio_compatible = + AudioSettings::try_read_global(cx, |setting| setting.legacy_audio_compatible) + .unwrap_or_default(); + + let source = if legacy_audio_compatible { + NativeAudioSource::new( + // n.b. this struct's options are always ignored, noise cancellation is provided by apm. + AudioSourceOptions::default(), + LEGACY_SAMPLE_RATE.get(), + LEGACY_CHANNEL_COUNT.get().into(), + 10, + ) + } else { + NativeAudioSource::new( + // n.b. this struct's options are always ignored, noise cancellation is provided by apm. + AudioSourceOptions::default(), + SAMPLE_RATE.get(), + CHANNEL_COUNT.get().into(), + 10, + ) + }; let track_name = serde_urlencoded::to_string(Speaker { name: user_name, is_staff, + legacy_audio_compatible, }) .context("Could not encode user information in track name")?; @@ -186,6 +202,7 @@ impl AudioStack { let capture_task = if rodio_pipeline { info!("Using experimental.rodio_audio audio pipeline"); let voip_parts = audio::VoipParts::new(cx)?; +<<<<<<< HEAD // Audio needs to run real-time and should never be paused. That is why we are using a // normal std::thread and not a background task thread::Builder::new() @@ -197,11 +214,32 @@ impl AudioStack { Ok::<(), anyhow::Error>(()) }) .unwrap(); +======= + // Audio needs to run real-time and should never be paused. That is + // why we are using a normal std::thread and not a background task + thread::spawn(move || { + // microphone is non send on mac + let microphone = match audio::Audio::open_microphone(voip_parts) { + Ok(m) => m, + Err(e) => { + log::error!("Could not open microphone: {e}"); + return; + } + }; + send_to_livekit(frame_tx, microphone); + Ok::<(), anyhow::Error>(()) + }); +>>>>>>> e459c8f30c (fixes denoise setting, prepares for migration to 16kHz) Task::ready(Ok(())) } else { self.executor.spawn(async move { - Self::capture_input(apm, frame_tx, SAMPLE_RATE.get(), CHANNEL_COUNT.get().into()) - .await + Self::capture_input( + apm, + frame_tx, + LEGACY_SAMPLE_RATE.get(), + LEGACY_CHANNEL_COUNT.get().into(), + ) + .await }) }; @@ -389,25 +427,30 @@ impl AudioStack { } #[derive(Serialize, Deserialize)] -struct Speaker { - name: String, - is_staff: bool, +pub struct Speaker { + pub name: String, + pub is_staff: bool, + pub legacy_audio_compatible: bool, } fn send_to_livekit(frame_tx: UnboundedSender>, mut microphone: impl Source) { use cpal::Sample; + let sample_rate = microphone.sample_rate().get(); + let num_channels = microphone.channels().get() as u32; + let buffer_size = sample_rate / 100 * num_channels; + loop { let sampled: Vec<_> = microphone .by_ref() - .take(audio::BUFFER_SIZE) + .take(buffer_size as usize) .map(|s| s.to_sample()) .collect(); if frame_tx .unbounded_send(AudioFrame { - sample_rate: SAMPLE_RATE.get(), - num_channels: CHANNEL_COUNT.get() as u32, - samples_per_channel: sampled.len() as u32 / CHANNEL_COUNT.get() as u32, + sample_rate, + num_channels, + samples_per_channel: sampled.len() as u32 / num_channels, data: Cow::Owned(sampled), }) .is_err() diff --git a/crates/livekit_client/src/livekit_client/playback/source.rs b/crates/livekit_client/src/livekit_client/playback/source.rs index f605b3d517cd816491f0eceadce5ac778ef75d21..cde4b19fda2e053346ad535e7c75b2abda60431a 100644 --- a/crates/livekit_client/src/livekit_client/playback/source.rs +++ b/crates/livekit_client/src/livekit_client/playback/source.rs @@ -3,17 +3,19 @@ use std::num::NonZero; use futures::StreamExt; use libwebrtc::{audio_stream::native::NativeAudioStream, prelude::AudioFrame}; use livekit::track::RemoteAudioTrack; -use rodio::{Source, buffer::SamplesBuffer, conversions::SampleTypeConverter, nz}; +use rodio::{ + ChannelCount, SampleRate, Source, buffer::SamplesBuffer, conversions::SampleTypeConverter, +}; -use audio::{CHANNEL_COUNT, SAMPLE_RATE}; +use audio::{CHANNEL_COUNT, LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE, SAMPLE_RATE}; fn frame_to_samplesbuffer(frame: AudioFrame) -> SamplesBuffer { let samples = frame.data.iter().copied(); let samples = SampleTypeConverter::<_, _>::new(samples); let samples: Vec = samples.collect(); SamplesBuffer::new( - nz!(2), // frame always has two channels - NonZero::new(frame.sample_rate).expect("audio frame sample rate is nonzero"), + NonZero::new(frame.num_channels as u16).expect("zero channels is nonsense"), + NonZero::new(frame.sample_rate).expect("samplerate zero is nonsense"), samples, ) } @@ -22,14 +24,26 @@ pub struct LiveKitStream { // shared_buffer: SharedBuffer, inner: rodio::queue::SourcesQueueOutput, _receiver_task: gpui::Task<()>, + channel_count: ChannelCount, + sample_rate: SampleRate, } impl LiveKitStream { - pub fn new(executor: &gpui::BackgroundExecutor, track: &RemoteAudioTrack) -> Self { + pub fn new( + executor: &gpui::BackgroundExecutor, + track: &RemoteAudioTrack, + legacy: bool, + ) -> Self { + let (channel_count, sample_rate) = if legacy { + (LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE) + } else { + (CHANNEL_COUNT, SAMPLE_RATE) + }; + let mut stream = NativeAudioStream::new( track.rtc_track(), - SAMPLE_RATE.get() as i32, - CHANNEL_COUNT.get().into(), + sample_rate.get() as i32, + channel_count.get().into(), ); let (queue_input, queue_output) = rodio::queue::queue(true); // spawn rtc stream @@ -45,6 +59,8 @@ impl LiveKitStream { LiveKitStream { _receiver_task: receiver_task, inner: queue_output, + sample_rate, + channel_count, } } } @@ -63,17 +79,11 @@ impl Source for LiveKitStream { } fn channels(&self) -> rodio::ChannelCount { - // This must be hardcoded because the playback source assumes constant - // sample rate and channel count. The queue upon which this is build - // will however report different counts and rates. Even though we put in - // only items with our (constant) CHANNEL_COUNT & SAMPLE_RATE this will - // play silence on one channel and at 44100 which is not what our - // constants are. - CHANNEL_COUNT + self.channel_count } fn sample_rate(&self) -> rodio::SampleRate { - SAMPLE_RATE // see comment on channels + self.sample_rate } fn total_duration(&self) -> Option {