Detailed changes
@@ -31,18 +31,20 @@ pub use rodio_ext::RodioExt;
use crate::audio_settings::LIVE_SETTINGS;
-// NOTE: We used to use WebRTC's mixer which only supported
-// 16kHz, 32kHz and 48kHz. As 48 is the most common "next step up"
-// for audio output devices like speakers/bluetooth, we just hard-code
-// this; and downsample when we need to.
+// We are migrating to 16kHz sample rate from 48kHz. In the future
+// once we are reasonably sure most users have upgraded we will
+// remove the LEGACY parameters.
//
-// Since most noise cancelling requires 16kHz we will move to
-// that in the future.
-pub const SAMPLE_RATE: NonZero<u32> = nz!(48000);
-pub const CHANNEL_COUNT: NonZero<u16> = nz!(2);
+// We migrate to 16kHz because it is sufficient for speech and required
+// by the denoiser and future Speech to Text layers.
+pub const SAMPLE_RATE: NonZero<u32> = nz!(16000);
+pub const CHANNEL_COUNT: NonZero<u16> = nz!(1);
pub const BUFFER_SIZE: usize = // echo canceller and livekit want 10ms of audio
(SAMPLE_RATE.get() as usize / 100) * CHANNEL_COUNT.get() as usize;
+pub const LEGACY_SAMPLE_RATE: NonZero<u32> = nz!(48000);
+pub const LEGACY_CHANNEL_COUNT: NonZero<u16> = nz!(2);
+
pub const REPLAY_DURATION: Duration = Duration::from_secs(30);
pub fn init(cx: &mut App) {
@@ -160,8 +162,13 @@ impl Audio {
let stream = rodio::microphone::MicrophoneBuilder::new()
.default_device()?
.default_config()?
- .prefer_sample_rates([SAMPLE_RATE, SAMPLE_RATE.saturating_mul(nz!(2))])
- .prefer_channel_counts([nz!(1), nz!(2)])
+ .prefer_sample_rates([
+ SAMPLE_RATE, // sample rates trivially resamplable to `SAMPLE_RATE`
+ SAMPLE_RATE.saturating_mul(nz!(2)),
+ SAMPLE_RATE.saturating_mul(nz!(3)),
+ SAMPLE_RATE.saturating_mul(nz!(4)),
+ ])
+ .prefer_channel_counts([CHANNEL_COUNT, CHANNEL_COUNT.saturating_mul(nz!(2))])
.prefer_buffer_sizes(512..)
.open_stream()?;
info!("Opened microphone: {:?}", stream.config());
@@ -189,15 +196,24 @@ impl Audio {
}
}
})
+ .denoise()
+ .context("Could not set up denoiser")?
+ .periodic_access(Duration::from_millis(100), move |denoise| {
+ denoise.set_enabled(LIVE_SETTINGS.denoise.load(Ordering::Relaxed));
+ })
.automatic_gain_control(1.0, 4.0, 0.0, 5.0)
.periodic_access(Duration::from_millis(100), move |agc_source| {
- agc_source.set_enabled(LIVE_SETTINGS.control_input_volume.load(Ordering::Relaxed));
+ agc_source
+ .set_enabled(LIVE_SETTINGS.auto_microphone_volume.load(Ordering::Relaxed));
})
.replayable(REPLAY_DURATION)?;
voip_parts
.replays
.add_voip_stream("local microphone".to_string(), replay);
+
+ let stream = stream.constant_params(LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE);
+
Ok(stream)
}
@@ -210,7 +226,7 @@ impl Audio {
let (replay_source, source) = source
.automatic_gain_control(1.0, 4.0, 0.0, 5.0)
.periodic_access(Duration::from_millis(100), move |agc_source| {
- agc_source.set_enabled(LIVE_SETTINGS.control_input_volume.load(Ordering::Relaxed));
+ agc_source.set_enabled(LIVE_SETTINGS.auto_speaker_volume.load(Ordering::Relaxed));
})
.replayable(REPLAY_DURATION)
.expect("REPLAY_DURATION is longer than 100ms");
@@ -9,44 +9,107 @@ use settings::{Settings, SettingsKey, SettingsSources, SettingsStore, SettingsUi
#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug, SettingsUi)]
pub struct AudioSettings {
/// Opt into the new audio system.
+ ///
+ /// You need to rejoin a call for this setting to apply
#[serde(rename = "experimental.rodio_audio", default)]
pub rodio_audio: bool, // default is false
/// Requires 'rodio_audio: true'
///
- /// Use the new audio systems automatic gain control for your microphone.
- /// This affects how loud you sound to others.
- #[serde(rename = "experimental.control_input_volume", default)]
- pub control_input_volume: bool,
+ /// Automatically increase or decrease you microphone's volume. This affects how
+ /// loud you sound to others.
+ ///
+ /// Recommended: off (default)
+ /// Microphones are too quite in zed, until everyone is on experimental
+ /// audio and has auto speaker volume on this will make you very loud
+ /// compared to other speakers.
+ #[serde(
+ rename = "experimental.auto_microphone_volume",
+ default = "default_false"
+ )]
+ pub auto_microphone_volume: bool,
+ /// Requires 'rodio_audio: true'
+ ///
+ /// Automatically increate or decrease the volume of other call members.
+ /// This only affects how things sound for you.
+ #[serde(rename = "experimental.auto_speaker_volume", default = "default_true")]
+ pub auto_speaker_volume: bool,
+ /// Requires 'rodio_audio: true'
+ ///
+ /// Remove background noises. Works great for typing, cars, dogs, AC. Does
+ /// not work well on music.
+ #[serde(rename = "experimental.denoise", default = "default_false")]
+ pub denoise: bool,
/// Requires 'rodio_audio: true'
///
- /// Use the new audio systems automatic gain control on everyone in the
- /// call. This makes call members who are too quite louder and those who are
- /// too loud quieter. This only affects how things sound for you.
- #[serde(rename = "experimental.control_output_volume", default)]
- pub control_output_volume: bool,
+ /// Use audio parameters compatible with the previous versions of
+ /// experimental audio and non-experimental audio. When this is false you
+ /// will sound strange to anyone not on the latest experimental audio. In
+ /// the future we will migrate by setting this to false
+ ///
+ /// You need to rejoin a call for this setting to apply
+ #[serde(
+ rename = "experimental.legacy_audio_compatible",
+ default = "default_true"
+ )]
+ pub legacy_audio_compatible: bool,
}
-
/// Configuration of audio in Zed.
#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug, SettingsUi, SettingsKey)]
#[serde(default)]
#[settings_key(key = "audio")]
pub struct AudioSettingsContent {
/// Opt into the new audio system.
+ ///
+ /// You need to rejoin a call for this setting to apply
#[serde(rename = "experimental.rodio_audio", default)]
pub rodio_audio: bool, // default is false
/// Requires 'rodio_audio: true'
///
- /// Use the new audio systems automatic gain control for your microphone.
- /// This affects how loud you sound to others.
- #[serde(rename = "experimental.control_input_volume", default)]
- pub control_input_volume: bool,
+ /// Automatically increase or decrease you microphone's volume. This affects how
+ /// loud you sound to others.
+ ///
+ /// Recommended: off (default)
+ /// Microphones are too quite in zed, until everyone is on experimental
+ /// audio and has auto speaker volume on this will make you very loud
+ /// compared to other speakers.
+ #[serde(
+ rename = "experimental.auto_microphone_volume",
+ default = "default_false"
+ )]
+ pub auto_microphone_volume: bool,
+ /// Requires 'rodio_audio: true'
+ ///
+ /// Automatically increate or decrease the volume of other call members.
+ /// This only affects how things sound for you.
+ #[serde(rename = "experimental.auto_speaker_volume", default = "default_true")]
+ pub auto_speaker_volume: bool,
/// Requires 'rodio_audio: true'
///
- /// Use the new audio systems automatic gain control on everyone in the
- /// call. This makes call members who are too quite louder and those who are
- /// too loud quieter. This only affects how things sound for you.
- #[serde(rename = "experimental.control_output_volume", default)]
- pub control_output_volume: bool,
+ /// Remove background noises. Works great for typing, cars, dogs, AC. Does
+ /// not work well on music.
+ #[serde(rename = "experimental.denoise", default = "default_false")]
+ pub denoise: bool,
+ /// Requires 'rodio_audio: true'
+ ///
+ /// Use audio parameters compatible with the previous versions of
+ /// experimental audio and non-experimental audio. When this is false you
+ /// will sound strange to anyone not on the latest experimental audio. In
+ /// the future we will migrate by setting this to false
+ ///
+ /// You need to rejoin a call for this setting to apply
+ #[serde(
+ rename = "experimental.legacy_audio_compatible",
+ default = "default_true"
+ )]
+ pub legacy_audio_compatible: bool,
+}
+
+fn default_true() -> bool {
+ true
+}
+
+fn default_false() -> bool {
+ false
}
impl Settings for AudioSettings {
@@ -61,31 +124,38 @@ impl Settings for AudioSettings {
/// See docs on [LIVE_SETTINGS]
pub(crate) struct LiveSettings {
- pub(crate) control_input_volume: AtomicBool,
- pub(crate) control_output_volume: AtomicBool,
+ pub(crate) auto_microphone_volume: AtomicBool,
+ pub(crate) auto_speaker_volume: AtomicBool,
+ pub(crate) denoise: AtomicBool,
}
impl LiveSettings {
pub(crate) fn initialize(&self, cx: &mut App) {
cx.observe_global::<SettingsStore>(move |cx| {
- LIVE_SETTINGS.control_input_volume.store(
- AudioSettings::get_global(cx).control_input_volume,
+ LIVE_SETTINGS.auto_microphone_volume.store(
+ AudioSettings::get_global(cx).auto_microphone_volume,
Ordering::Relaxed,
);
- LIVE_SETTINGS.control_output_volume.store(
- AudioSettings::get_global(cx).control_output_volume,
+ LIVE_SETTINGS.auto_speaker_volume.store(
+ AudioSettings::get_global(cx).auto_speaker_volume,
Ordering::Relaxed,
);
+ LIVE_SETTINGS
+ .denoise
+ .store(AudioSettings::get_global(cx).denoise, Ordering::Relaxed);
})
.detach();
let init_settings = AudioSettings::get_global(cx);
LIVE_SETTINGS
- .control_input_volume
- .store(init_settings.control_input_volume, Ordering::Relaxed);
+ .auto_microphone_volume
+ .store(init_settings.auto_microphone_volume, Ordering::Relaxed);
+ LIVE_SETTINGS
+ .auto_speaker_volume
+ .store(init_settings.auto_speaker_volume, Ordering::Relaxed);
LIVE_SETTINGS
- .control_output_volume
- .store(init_settings.control_output_volume, Ordering::Relaxed);
+ .denoise
+ .store(init_settings.denoise, Ordering::Relaxed);
}
}
@@ -94,6 +164,7 @@ impl LiveSettings {
/// real time and must each run in a dedicated OS thread, therefore we can not
/// use the background executor.
pub(crate) static LIVE_SETTINGS: LiveSettings = LiveSettings {
- control_input_volume: AtomicBool::new(true),
- control_output_volume: AtomicBool::new(true),
+ auto_microphone_volume: AtomicBool::new(true),
+ auto_speaker_volume: AtomicBool::new(true),
+ denoise: AtomicBool::new(true),
};
@@ -1,6 +1,6 @@
use std::sync::Arc;
-use anyhow::{Context as _, Result};
+use anyhow::{Context as _, Result, anyhow};
use audio::AudioSettings;
use collections::HashMap;
use futures::{SinkExt, channel::mpsc};
@@ -12,7 +12,10 @@ use settings::Settings;
mod playback;
-use crate::{LocalTrack, Participant, RemoteTrack, RoomEvent, TrackPublication};
+use crate::{
+ LocalTrack, Participant, RemoteTrack, RoomEvent, TrackPublication,
+ livekit_client::playback::Speaker,
+};
pub use playback::AudioStream;
pub(crate) use playback::{RemoteVideoFrame, play_remote_video_track};
@@ -132,11 +135,20 @@ impl Room {
track: &RemoteAudioTrack,
cx: &mut App,
) -> Result<playback::AudioStream> {
+ let speaker: Speaker =
+ serde_urlencoded::from_str(&track.0.name()).unwrap_or_else(|_| Speaker {
+ name: track.0.name(),
+ is_staff: false,
+ legacy_audio_compatible: true,
+ });
+
if AudioSettings::get_global(cx).rodio_audio {
info!("Using experimental.rodio_audio audio pipeline for output");
- playback::play_remote_audio_track(&track.0, cx)
- } else {
+ playback::play_remote_audio_track(&track.0, speaker, cx)
+ } else if speaker.legacy_audio_compatible {
Ok(self.playback.play_remote_audio_track(&track.0))
+ } else {
+ Err(anyhow!("Client version too old to play audio in call"))
}
}
}
@@ -1,6 +1,6 @@
use anyhow::{Context as _, Result};
-use audio::{AudioSettings, CHANNEL_COUNT, SAMPLE_RATE};
+use audio::{AudioSettings, CHANNEL_COUNT, LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE, SAMPLE_RATE};
use cpal::traits::{DeviceTrait, StreamTrait as _};
use futures::channel::mpsc::UnboundedSender;
use futures::{Stream, StreamExt as _};
@@ -43,12 +43,17 @@ pub(crate) struct AudioStack {
pub(crate) fn play_remote_audio_track(
track: &livekit::track::RemoteAudioTrack,
+ speaker: Speaker,
cx: &mut gpui::App,
) -> Result<AudioStream> {
+ let stream = source::LiveKitStream::new(
+ cx.background_executor(),
+ track,
+ speaker.legacy_audio_compatible,
+ );
+
let stop_handle = Arc::new(AtomicBool::new(false));
let stop_handle_clone = stop_handle.clone();
- let stream = source::LiveKitStream::new(cx.background_executor(), track);
-
let stream = stream
.stoppable()
.periodic_access(Duration::from_millis(50), move |s| {
@@ -57,10 +62,6 @@ pub(crate) fn play_remote_audio_track(
}
});
- let speaker: Speaker = serde_urlencoded::from_str(&track.name()).unwrap_or_else(|_| Speaker {
- name: track.name(),
- is_staff: false,
- });
audio::Audio::play_voip_stream(stream, speaker.name, speaker.is_staff, cx)
.context("Could not play audio")?;
@@ -152,17 +153,32 @@ impl AudioStack {
is_staff: bool,
cx: &AsyncApp,
) -> Result<(crate::LocalAudioTrack, AudioStream)> {
- let source = NativeAudioSource::new(
- // n.b. this struct's options are always ignored, noise cancellation is provided by apm.
- AudioSourceOptions::default(),
- SAMPLE_RATE.get(),
- CHANNEL_COUNT.get().into(),
- 10,
- );
+ let legacy_audio_compatible =
+ AudioSettings::try_read_global(cx, |setting| setting.legacy_audio_compatible)
+ .unwrap_or_default();
+
+ let source = if legacy_audio_compatible {
+ NativeAudioSource::new(
+ // n.b. this struct's options are always ignored, noise cancellation is provided by apm.
+ AudioSourceOptions::default(),
+ LEGACY_SAMPLE_RATE.get(),
+ LEGACY_CHANNEL_COUNT.get().into(),
+ 10,
+ )
+ } else {
+ NativeAudioSource::new(
+ // n.b. this struct's options are always ignored, noise cancellation is provided by apm.
+ AudioSourceOptions::default(),
+ SAMPLE_RATE.get(),
+ CHANNEL_COUNT.get().into(),
+ 10,
+ )
+ };
let track_name = serde_urlencoded::to_string(Speaker {
name: user_name,
is_staff,
+ legacy_audio_compatible,
})
.context("Could not encode user information in track name")?;
@@ -186,6 +202,7 @@ impl AudioStack {
let capture_task = if rodio_pipeline {
info!("Using experimental.rodio_audio audio pipeline");
let voip_parts = audio::VoipParts::new(cx)?;
+<<<<<<< HEAD
// Audio needs to run real-time and should never be paused. That is why we are using a
// normal std::thread and not a background task
thread::Builder::new()
@@ -197,11 +214,32 @@ impl AudioStack {
Ok::<(), anyhow::Error>(())
})
.unwrap();
+=======
+ // Audio needs to run real-time and should never be paused. That is
+ // why we are using a normal std::thread and not a background task
+ thread::spawn(move || {
+ // microphone is non send on mac
+ let microphone = match audio::Audio::open_microphone(voip_parts) {
+ Ok(m) => m,
+ Err(e) => {
+ log::error!("Could not open microphone: {e}");
+ return;
+ }
+ };
+ send_to_livekit(frame_tx, microphone);
+ Ok::<(), anyhow::Error>(())
+ });
+>>>>>>> e459c8f30c (fixes denoise setting, prepares for migration to 16kHz)
Task::ready(Ok(()))
} else {
self.executor.spawn(async move {
- Self::capture_input(apm, frame_tx, SAMPLE_RATE.get(), CHANNEL_COUNT.get().into())
- .await
+ Self::capture_input(
+ apm,
+ frame_tx,
+ LEGACY_SAMPLE_RATE.get(),
+ LEGACY_CHANNEL_COUNT.get().into(),
+ )
+ .await
})
};
@@ -389,25 +427,30 @@ impl AudioStack {
}
#[derive(Serialize, Deserialize)]
-struct Speaker {
- name: String,
- is_staff: bool,
+pub struct Speaker {
+ pub name: String,
+ pub is_staff: bool,
+ pub legacy_audio_compatible: bool,
}
fn send_to_livekit(frame_tx: UnboundedSender<AudioFrame<'static>>, mut microphone: impl Source) {
use cpal::Sample;
+ let sample_rate = microphone.sample_rate().get();
+ let num_channels = microphone.channels().get() as u32;
+ let buffer_size = sample_rate / 100 * num_channels;
+
loop {
let sampled: Vec<_> = microphone
.by_ref()
- .take(audio::BUFFER_SIZE)
+ .take(buffer_size as usize)
.map(|s| s.to_sample())
.collect();
if frame_tx
.unbounded_send(AudioFrame {
- sample_rate: SAMPLE_RATE.get(),
- num_channels: CHANNEL_COUNT.get() as u32,
- samples_per_channel: sampled.len() as u32 / CHANNEL_COUNT.get() as u32,
+ sample_rate,
+ num_channels,
+ samples_per_channel: sampled.len() as u32 / num_channels,
data: Cow::Owned(sampled),
})
.is_err()
@@ -3,17 +3,19 @@ use std::num::NonZero;
use futures::StreamExt;
use libwebrtc::{audio_stream::native::NativeAudioStream, prelude::AudioFrame};
use livekit::track::RemoteAudioTrack;
-use rodio::{Source, buffer::SamplesBuffer, conversions::SampleTypeConverter, nz};
+use rodio::{
+ ChannelCount, SampleRate, Source, buffer::SamplesBuffer, conversions::SampleTypeConverter,
+};
-use audio::{CHANNEL_COUNT, SAMPLE_RATE};
+use audio::{CHANNEL_COUNT, LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE, SAMPLE_RATE};
fn frame_to_samplesbuffer(frame: AudioFrame) -> SamplesBuffer {
let samples = frame.data.iter().copied();
let samples = SampleTypeConverter::<_, _>::new(samples);
let samples: Vec<f32> = samples.collect();
SamplesBuffer::new(
- nz!(2), // frame always has two channels
- NonZero::new(frame.sample_rate).expect("audio frame sample rate is nonzero"),
+ NonZero::new(frame.num_channels as u16).expect("zero channels is nonsense"),
+ NonZero::new(frame.sample_rate).expect("samplerate zero is nonsense"),
samples,
)
}
@@ -22,14 +24,26 @@ pub struct LiveKitStream {
// shared_buffer: SharedBuffer,
inner: rodio::queue::SourcesQueueOutput,
_receiver_task: gpui::Task<()>,
+ channel_count: ChannelCount,
+ sample_rate: SampleRate,
}
impl LiveKitStream {
- pub fn new(executor: &gpui::BackgroundExecutor, track: &RemoteAudioTrack) -> Self {
+ pub fn new(
+ executor: &gpui::BackgroundExecutor,
+ track: &RemoteAudioTrack,
+ legacy: bool,
+ ) -> Self {
+ let (channel_count, sample_rate) = if legacy {
+ (LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE)
+ } else {
+ (CHANNEL_COUNT, SAMPLE_RATE)
+ };
+
let mut stream = NativeAudioStream::new(
track.rtc_track(),
- SAMPLE_RATE.get() as i32,
- CHANNEL_COUNT.get().into(),
+ sample_rate.get() as i32,
+ channel_count.get().into(),
);
let (queue_input, queue_output) = rodio::queue::queue(true);
// spawn rtc stream
@@ -45,6 +59,8 @@ impl LiveKitStream {
LiveKitStream {
_receiver_task: receiver_task,
inner: queue_output,
+ sample_rate,
+ channel_count,
}
}
}
@@ -63,17 +79,11 @@ impl Source for LiveKitStream {
}
fn channels(&self) -> rodio::ChannelCount {
- // This must be hardcoded because the playback source assumes constant
- // sample rate and channel count. The queue upon which this is build
- // will however report different counts and rates. Even though we put in
- // only items with our (constant) CHANNEL_COUNT & SAMPLE_RATE this will
- // play silence on one channel and at 44100 which is not what our
- // constants are.
- CHANNEL_COUNT
+ self.channel_count
}
fn sample_rate(&self) -> rodio::SampleRate {
- SAMPLE_RATE // see comment on channels
+ self.sample_rate
}
fn total_duration(&self) -> Option<std::time::Duration> {