From 2441dc3f6637431a781ae10b2e1aa8c4704b9502 Mon Sep 17 00:00:00 2001 From: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com> Date: Mon, 15 Dec 2025 21:33:15 +0100 Subject: [PATCH] gpui: Take advantage of unified memory on Apple silicon (#44273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metal chooses a buffer’s default storage mode based on the type of GPU in use. On Apple GPUs, the default mode is shared, which allows the CPU and GPU to access the same memory without requiring explicit synchronization. On discrete or external GPUs, Metal instead defaults to managed storage, which does require explicit CPU–GPU memory synchronization. This change aligns our buffer usage with Metal’s default behavior and avoids unnecessary synchronization on Apple-silicon Macs. As a result, memory usage on Apple hardware is reduced and performance improves due to fewer sync operations. Ref: https://developer.apple.com/documentation/metal/setting-resource-storage-modes Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos With the storage mode: image On main branch: image That's a 44% reduction of memory usage. Release Notes: - Reduced memory usage on Apple-silicon Macs by using shared memory where appropriate --------- Signed-off-by: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com> --- crates/gpui/src/platform/mac/metal_atlas.rs | 9 +++ .../gpui/src/platform/mac/metal_renderer.rs | 61 +++++++++++++++---- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/crates/gpui/src/platform/mac/metal_atlas.rs b/crates/gpui/src/platform/mac/metal_atlas.rs index 8282530c5efdc13ca95a1f04c0f6ef1a23c8366c..9b43efe361a0816e32e858a44cafec66c42e7f85 100644 --- a/crates/gpui/src/platform/mac/metal_atlas.rs +++ b/crates/gpui/src/platform/mac/metal_atlas.rs @@ -15,6 +15,9 @@ pub(crate) struct MetalAtlas(Mutex); impl MetalAtlas { pub(crate) fn new(device: Device) -> Self { MetalAtlas(Mutex::new(MetalAtlasState { + // Shared memory can be used only if CPU and GPU share the same memory space. + // https://developer.apple.com/documentation/metal/setting-resource-storage-modes + unified_memory: device.has_unified_memory(), device: AssertSend(device), monochrome_textures: Default::default(), polychrome_textures: Default::default(), @@ -29,6 +32,7 @@ impl MetalAtlas { struct MetalAtlasState { device: AssertSend, + unified_memory: bool, monochrome_textures: AtlasTextureList, polychrome_textures: AtlasTextureList, tiles_by_key: FxHashMap, @@ -146,6 +150,11 @@ impl MetalAtlasState { } texture_descriptor.set_pixel_format(pixel_format); texture_descriptor.set_usage(usage); + texture_descriptor.set_storage_mode(if self.unified_memory { + metal::MTLStorageMode::Shared + } else { + metal::MTLStorageMode::Managed + }); let metal_texture = self.device.new_texture(&texture_descriptor); let texture_list = match kind { diff --git a/crates/gpui/src/platform/mac/metal_renderer.rs b/crates/gpui/src/platform/mac/metal_renderer.rs index 550041a0ccb4cd39bc7a86317d9540e806af2a28..6d7b82507fb581ec1f124e153e5bb91d3eaf9d25 100644 --- a/crates/gpui/src/platform/mac/metal_renderer.rs +++ b/crates/gpui/src/platform/mac/metal_renderer.rs @@ -76,12 +76,22 @@ impl InstanceBufferPool { self.buffers.clear(); } - pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer { + pub(crate) fn acquire( + &mut self, + device: &metal::Device, + unified_memory: bool, + ) -> InstanceBuffer { let buffer = self.buffers.pop().unwrap_or_else(|| { - device.new_buffer( - self.buffer_size as u64, - MTLResourceOptions::StorageModeManaged, - ) + let options = if unified_memory { + MTLResourceOptions::StorageModeShared + // Buffers are write only which can benefit from the combined cache + // https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }; + + device.new_buffer(self.buffer_size as u64, options) }); InstanceBuffer { metal_buffer: buffer, @@ -99,6 +109,7 @@ impl InstanceBufferPool { pub(crate) struct MetalRenderer { device: metal::Device, layer: metal::MetalLayer, + unified_memory: bool, presents_with_transaction: bool, command_queue: CommandQueue, paths_rasterization_pipeline_state: metal::RenderPipelineState, @@ -179,6 +190,10 @@ impl MetalRenderer { output } + // Shared memory can be used only if CPU and GPU share the same memory space. + // https://developer.apple.com/documentation/metal/setting-resource-storage-modes + let unified_memory = device.has_unified_memory(); + let unit_vertices = [ to_float2_bits(point(0., 0.)), to_float2_bits(point(1., 0.)), @@ -190,7 +205,12 @@ impl MetalRenderer { let unit_vertices = device.new_buffer_with_data( unit_vertices.as_ptr() as *const c_void, mem::size_of_val(&unit_vertices) as u64, - MTLResourceOptions::StorageModeManaged, + if unified_memory { + MTLResourceOptions::StorageModeShared + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }, ); let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state( @@ -268,6 +288,7 @@ impl MetalRenderer { device, layer, presents_with_transaction: false, + unified_memory, command_queue, paths_rasterization_pipeline_state, path_sprites_pipeline_state, @@ -337,14 +358,23 @@ impl MetalRenderer { texture_descriptor.set_width(size.width.0 as u64); texture_descriptor.set_height(size.height.0 as u64); texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm); + texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private); texture_descriptor .set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead); self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor)); if self.path_sample_count > 1 { + // https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus + // Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon + let storage_mode = if self.unified_memory { + metal::MTLStorageMode::Memoryless + } else { + metal::MTLStorageMode::Private + }; + let mut msaa_descriptor = texture_descriptor; msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample); - msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private); + msaa_descriptor.set_storage_mode(storage_mode); msaa_descriptor.set_sample_count(self.path_sample_count as _); self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor)); } else { @@ -378,7 +408,10 @@ impl MetalRenderer { }; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -550,10 +583,14 @@ impl MetalRenderer { command_encoder.end_encoding(); - instance_buffer.metal_buffer.did_modify_range(NSRange { - location: 0, - length: instance_offset as NSUInteger, - }); + if !self.unified_memory { + // Sync the instance buffer to the GPU + instance_buffer.metal_buffer.did_modify_range(NSRange { + location: 0, + length: instance_offset as NSUInteger, + }); + } + Ok(command_buffer.to_owned()) }