From c8656ac96d2344fc288b551943cc12fcb6ef56ad Mon Sep 17 00:00:00 2001 From: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com> Date: Sat, 21 Feb 2026 16:53:12 +0100 Subject: [PATCH] gpui: Take advantage of unified memory on macOS (#49236) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third attempt to land this improvement: (#45577, #44273) The previous PR didn’t work on Intel MacBooks because I made a wrong assumption about the unified memory check. `has_unified_memory` only tells us that the CPU and GPU share memory. It does not mean we’re running on an Apple GPU family. Memoryless textures are only supported on Apple GPUs. Some Intel Macs report unified memory, but they don’t support memoryless textures, which is why the previous change failed there. So instead of relying on unified memory, we now explicitly check that we’re running on an Apple GPU family before enabling memoryless textures. Before you mark this PR as ready for review, make sure that you have: - [ ] Added a solid test coverage and/or screenshots from doing manual testing - [X] Done a self-review taking into account security and performance aspects - [ ] Aligned any UI changes with the [UI checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) Release Notes: - Reduced memory usage on macOS --------- Signed-off-by: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com> --- Cargo.lock | 33 +++-------- Cargo.toml | 4 +- crates/gpui_macos/src/metal_atlas.rs | 11 +++- crates/gpui_macos/src/metal_renderer.rs | 77 ++++++++++++++++++++----- 4 files changed, 83 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90aa2b09ace5b9b99c3adda5d6989efe776d629d..0ceda16a94d59f5951c7b2bd691f8e88e6fa3279 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3755,9 +3755,9 @@ dependencies = [ [[package]] name = "core-graphics2" -version = "0.4.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4583956b9806b69f73fcb23aee05eb3620efc282972f08f6a6db7504f8334d" +checksum = "4416167a69126e617f8d0a214af0e3c1dbdeffcb100ddf72dcd1a1ac9893c146" dependencies = [ "bitflags 2.10.0", "block", @@ -3789,16 +3789,16 @@ dependencies = [ [[package]] name = "core-video" -version = "0.4.3" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d45e71d5be22206bed53c3c3cb99315fc4c3d31b8963808c6bc4538168c4f8ef" +checksum = "139679cc63eb9504bdbe37e37874b0247136177655f0008588781e90863afa62" dependencies = [ "block", "core-foundation 0.10.0", "core-graphics2", "io-surface", "libc", - "metal 0.29.0", + "metal", ] [[package]] @@ -7407,7 +7407,7 @@ dependencies = [ "lyon", "mach2 0.5.0", "media", - "metal 0.29.0", + "metal", "naga", "num_cpus", "objc", @@ -7525,7 +7525,7 @@ dependencies = [ "log", "mach2 0.5.0", "media", - "metal 0.29.0", + "metal", "objc", "parking_lot", "pathfinder_geometry", @@ -10106,7 +10106,7 @@ dependencies = [ "core-video", "ctor", "foreign-types 0.5.0", - "metal 0.29.0", + "metal", "objc", ] @@ -10189,21 +10189,6 @@ dependencies = [ "ttf-parser 0.20.0", ] -[[package]] -name = "metal" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" -dependencies = [ - "bitflags 2.10.0", - "block", - "core-graphics-types 0.1.3", - "foreign-types 0.5.0", - "log", - "objc", - "paste", -] - [[package]] name = "metal" version = "0.33.0" @@ -19634,7 +19619,7 @@ dependencies = [ "libc", "libloading", "log", - "metal 0.33.0", + "metal", "naga", "ndk-sys", "objc", diff --git a/Cargo.toml b/Cargo.toml index 6e726622b7f7ae245152f4c3803dbcb43db4d6e7..35e9ff00fbda09ff2f47a65aa5ee001f83a743cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -523,7 +523,7 @@ cocoa-foundation = "=0.2.0" convert_case = "0.8.0" core-foundation = "=0.10.0" core-foundation-sys = "0.8.6" -core-video = { version = "0.4.3", features = ["metal"] } +core-video = { version = "0.5.2", features = ["metal"] } cpal = "0.17" crash-handler = "0.6" criterion = { version = "0.5", features = ["html_reports"] } @@ -574,7 +574,7 @@ log = { version = "0.4.16", features = ["kv_unstable_serde", "serde"] } lsp-types = { git = "https://github.com/zed-industries/lsp-types", rev = "a4f410987660bf560d1e617cb78117c6b6b9f599" } mach2 = "0.5" markup5ever_rcdom = "0.3.0" -metal = "0.29" +metal = "0.33" minidumper = "0.8" moka = { version = "0.12.10", features = ["sync"] } naga = { version = "28.0", features = ["wgsl-in"] } diff --git a/crates/gpui_macos/src/metal_atlas.rs b/crates/gpui_macos/src/metal_atlas.rs index a741db75f77701f63407539979bdf3ecf08524dc..eacd9407fe2e447abbd05dc8cdb2e9f7660cf3cf 100644 --- a/crates/gpui_macos/src/metal_atlas.rs +++ b/crates/gpui_macos/src/metal_atlas.rs @@ -13,9 +13,10 @@ use std::borrow::Cow; pub(crate) struct MetalAtlas(Mutex); impl MetalAtlas { - pub(crate) fn new(device: Device) -> Self { + pub(crate) fn new(device: Device, is_apple_gpu: bool) -> Self { MetalAtlas(Mutex::new(MetalAtlasState { device: AssertSend(device), + is_apple_gpu, monochrome_textures: Default::default(), polychrome_textures: Default::default(), tiles_by_key: Default::default(), @@ -29,6 +30,7 @@ impl MetalAtlas { struct MetalAtlasState { device: AssertSend, + is_apple_gpu: bool, monochrome_textures: AtlasTextureList, polychrome_textures: AtlasTextureList, tiles_by_key: FxHashMap, @@ -149,6 +151,13 @@ impl MetalAtlasState { } texture_descriptor.set_pixel_format(pixel_format); texture_descriptor.set_usage(usage); + // Shared memory mode can be used only on Apple GPU families + // https://developer.apple.com/documentation/metal/mtlresourceoptions/storagemodeshared + texture_descriptor.set_storage_mode(if self.is_apple_gpu { + metal::MTLStorageMode::Shared + } else { + metal::MTLStorageMode::Managed + }); let metal_texture = self.device.new_texture(&texture_descriptor); let texture_list = match kind { diff --git a/crates/gpui_macos/src/metal_renderer.rs b/crates/gpui_macos/src/metal_renderer.rs index 17ed3543124c4f527c3bc75cff2e192c12003975..93e039019b1ca639118b5453ff8f9de0d30e4f99 100644 --- a/crates/gpui_macos/src/metal_renderer.rs +++ b/crates/gpui_macos/src/metal_renderer.rs @@ -21,7 +21,7 @@ use core_video::{ }; use foreign_types::{ForeignType, ForeignTypeRef}; use metal::{ - CAMetalLayer, CommandQueue, MTLPixelFormat, MTLResourceOptions, NSRange, + CAMetalLayer, CommandQueue, MTLGPUFamily, MTLPixelFormat, MTLResourceOptions, NSRange, RenderPassColorAttachmentDescriptorRef, }; use objc::{self, msg_send, sel, sel_impl}; @@ -78,12 +78,22 @@ impl InstanceBufferPool { self.buffers.clear(); } - pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer { + pub(crate) fn acquire( + &mut self, + device: &metal::Device, + unified_memory: bool, + ) -> InstanceBuffer { let buffer = self.buffers.pop().unwrap_or_else(|| { - device.new_buffer( - self.buffer_size as u64, - MTLResourceOptions::StorageModeManaged, - ) + let options = if unified_memory { + MTLResourceOptions::StorageModeShared + // Buffers are write only which can benefit from the combined cache + // https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }; + + device.new_buffer(self.buffer_size as u64, options) }); InstanceBuffer { metal_buffer: buffer, @@ -101,6 +111,8 @@ impl InstanceBufferPool { pub(crate) struct MetalRenderer { device: metal::Device, layer: metal::MetalLayer, + is_apple_gpu: bool, + is_unified_memory: bool, presents_with_transaction: bool, command_queue: CommandQueue, paths_rasterization_pipeline_state: metal::RenderPipelineState, @@ -186,6 +198,15 @@ impl MetalRenderer { output } + // Shared memory can be used only if CPU and GPU share the same memory space. + // https://developer.apple.com/documentation/metal/setting-resource-storage-modes + let is_unified_memory = device.has_unified_memory(); + // Apple GPU families support memoryless textures, which can significantly reduce + // memory usage by keeping render targets in on-chip tile memory instead of + // allocating backing store in system memory. + // https://developer.apple.com/documentation/metal/mtlgpufamily + let is_apple_gpu = device.supports_family(MTLGPUFamily::Apple1); + let unit_vertices = [ to_float2_bits(point(0., 0.)), to_float2_bits(point(1., 0.)), @@ -197,7 +218,12 @@ impl MetalRenderer { let unit_vertices = device.new_buffer_with_data( unit_vertices.as_ptr() as *const c_void, mem::size_of_val(&unit_vertices) as u64, - MTLResourceOptions::StorageModeManaged, + if is_unified_memory { + MTLResourceOptions::StorageModeShared + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }, ); let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state( @@ -267,7 +293,7 @@ impl MetalRenderer { ); let command_queue = device.new_command_queue(); - let sprite_atlas = Arc::new(MetalAtlas::new(device.clone())); + let sprite_atlas = Arc::new(MetalAtlas::new(device.clone(), is_apple_gpu)); let core_video_texture_cache = CVMetalTextureCache::new(None, device.clone(), None).unwrap(); @@ -275,6 +301,8 @@ impl MetalRenderer { device, layer, presents_with_transaction: false, + is_apple_gpu, + is_unified_memory, command_queue, paths_rasterization_pipeline_state, path_sprites_pipeline_state, @@ -344,14 +372,23 @@ impl MetalRenderer { texture_descriptor.set_width(size.width.0 as u64); texture_descriptor.set_height(size.height.0 as u64); texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm); + texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private); texture_descriptor .set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead); self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor)); if self.path_sample_count > 1 { + // https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus + // Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon + let storage_mode = if self.is_apple_gpu { + metal::MTLStorageMode::Memoryless + } else { + metal::MTLStorageMode::Private + }; + let msaa_descriptor = texture_descriptor; msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample); - msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private); + msaa_descriptor.set_storage_mode(storage_mode); msaa_descriptor.set_sample_count(self.path_sample_count as _); self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor)); } else { @@ -385,7 +422,10 @@ impl MetalRenderer { }; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.is_unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -449,7 +489,10 @@ impl MetalRenderer { .ok_or_else(|| anyhow::anyhow!("Failed to get drawable for render_to_image"))?; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.is_unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -646,10 +689,14 @@ impl MetalRenderer { command_encoder.end_encoding(); - instance_buffer.metal_buffer.did_modify_range(NSRange { - location: 0, - length: instance_offset as NSUInteger, - }); + if !self.is_unified_memory { + // Sync the instance buffer to the GPU + instance_buffer.metal_buffer.did_modify_range(NSRange { + location: 0, + length: instance_offset as NSUInteger, + }); + } + Ok(command_buffer.to_owned()) }