diff --git a/Cargo.lock b/Cargo.lock index 90aa2b09ace5b9b99c3adda5d6989efe776d629d..0ceda16a94d59f5951c7b2bd691f8e88e6fa3279 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3755,9 +3755,9 @@ dependencies = [ [[package]] name = "core-graphics2" -version = "0.4.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4583956b9806b69f73fcb23aee05eb3620efc282972f08f6a6db7504f8334d" +checksum = "4416167a69126e617f8d0a214af0e3c1dbdeffcb100ddf72dcd1a1ac9893c146" dependencies = [ "bitflags 2.10.0", "block", @@ -3789,16 +3789,16 @@ dependencies = [ [[package]] name = "core-video" -version = "0.4.3" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d45e71d5be22206bed53c3c3cb99315fc4c3d31b8963808c6bc4538168c4f8ef" +checksum = "139679cc63eb9504bdbe37e37874b0247136177655f0008588781e90863afa62" dependencies = [ "block", "core-foundation 0.10.0", "core-graphics2", "io-surface", "libc", - "metal 0.29.0", + "metal", ] [[package]] @@ -7407,7 +7407,7 @@ dependencies = [ "lyon", "mach2 0.5.0", "media", - "metal 0.29.0", + "metal", "naga", "num_cpus", "objc", @@ -7525,7 +7525,7 @@ dependencies = [ "log", "mach2 0.5.0", "media", - "metal 0.29.0", + "metal", "objc", "parking_lot", "pathfinder_geometry", @@ -10106,7 +10106,7 @@ dependencies = [ "core-video", "ctor", "foreign-types 0.5.0", - "metal 0.29.0", + "metal", "objc", ] @@ -10189,21 +10189,6 @@ dependencies = [ "ttf-parser 0.20.0", ] -[[package]] -name = "metal" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" -dependencies = [ - "bitflags 2.10.0", - "block", - "core-graphics-types 0.1.3", - "foreign-types 0.5.0", - "log", - "objc", - "paste", -] - [[package]] name = "metal" version = "0.33.0" @@ -19634,7 +19619,7 @@ dependencies = [ "libc", "libloading", "log", - "metal 0.33.0", + "metal", "naga", "ndk-sys", "objc", diff --git a/Cargo.toml b/Cargo.toml index 6e726622b7f7ae245152f4c3803dbcb43db4d6e7..35e9ff00fbda09ff2f47a65aa5ee001f83a743cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -523,7 +523,7 @@ cocoa-foundation = "=0.2.0" convert_case = "0.8.0" core-foundation = "=0.10.0" core-foundation-sys = "0.8.6" -core-video = { version = "0.4.3", features = ["metal"] } +core-video = { version = "0.5.2", features = ["metal"] } cpal = "0.17" crash-handler = "0.6" criterion = { version = "0.5", features = ["html_reports"] } @@ -574,7 +574,7 @@ log = { version = "0.4.16", features = ["kv_unstable_serde", "serde"] } lsp-types = { git = "https://github.com/zed-industries/lsp-types", rev = "a4f410987660bf560d1e617cb78117c6b6b9f599" } mach2 = "0.5" markup5ever_rcdom = "0.3.0" -metal = "0.29" +metal = "0.33" minidumper = "0.8" moka = { version = "0.12.10", features = ["sync"] } naga = { version = "28.0", features = ["wgsl-in"] } diff --git a/crates/gpui_macos/src/metal_atlas.rs b/crates/gpui_macos/src/metal_atlas.rs index a741db75f77701f63407539979bdf3ecf08524dc..eacd9407fe2e447abbd05dc8cdb2e9f7660cf3cf 100644 --- a/crates/gpui_macos/src/metal_atlas.rs +++ b/crates/gpui_macos/src/metal_atlas.rs @@ -13,9 +13,10 @@ use std::borrow::Cow; pub(crate) struct MetalAtlas(Mutex); impl MetalAtlas { - pub(crate) fn new(device: Device) -> Self { + pub(crate) fn new(device: Device, is_apple_gpu: bool) -> Self { MetalAtlas(Mutex::new(MetalAtlasState { device: AssertSend(device), + is_apple_gpu, monochrome_textures: Default::default(), polychrome_textures: Default::default(), tiles_by_key: Default::default(), @@ -29,6 +30,7 @@ impl MetalAtlas { struct MetalAtlasState { device: AssertSend, + is_apple_gpu: bool, monochrome_textures: AtlasTextureList, polychrome_textures: AtlasTextureList, tiles_by_key: FxHashMap, @@ -149,6 +151,13 @@ impl MetalAtlasState { } texture_descriptor.set_pixel_format(pixel_format); texture_descriptor.set_usage(usage); + // Shared memory mode can be used only on Apple GPU families + // https://developer.apple.com/documentation/metal/mtlresourceoptions/storagemodeshared + texture_descriptor.set_storage_mode(if self.is_apple_gpu { + metal::MTLStorageMode::Shared + } else { + metal::MTLStorageMode::Managed + }); let metal_texture = self.device.new_texture(&texture_descriptor); let texture_list = match kind { diff --git a/crates/gpui_macos/src/metal_renderer.rs b/crates/gpui_macos/src/metal_renderer.rs index 17ed3543124c4f527c3bc75cff2e192c12003975..93e039019b1ca639118b5453ff8f9de0d30e4f99 100644 --- a/crates/gpui_macos/src/metal_renderer.rs +++ b/crates/gpui_macos/src/metal_renderer.rs @@ -21,7 +21,7 @@ use core_video::{ }; use foreign_types::{ForeignType, ForeignTypeRef}; use metal::{ - CAMetalLayer, CommandQueue, MTLPixelFormat, MTLResourceOptions, NSRange, + CAMetalLayer, CommandQueue, MTLGPUFamily, MTLPixelFormat, MTLResourceOptions, NSRange, RenderPassColorAttachmentDescriptorRef, }; use objc::{self, msg_send, sel, sel_impl}; @@ -78,12 +78,22 @@ impl InstanceBufferPool { self.buffers.clear(); } - pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer { + pub(crate) fn acquire( + &mut self, + device: &metal::Device, + unified_memory: bool, + ) -> InstanceBuffer { let buffer = self.buffers.pop().unwrap_or_else(|| { - device.new_buffer( - self.buffer_size as u64, - MTLResourceOptions::StorageModeManaged, - ) + let options = if unified_memory { + MTLResourceOptions::StorageModeShared + // Buffers are write only which can benefit from the combined cache + // https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }; + + device.new_buffer(self.buffer_size as u64, options) }); InstanceBuffer { metal_buffer: buffer, @@ -101,6 +111,8 @@ impl InstanceBufferPool { pub(crate) struct MetalRenderer { device: metal::Device, layer: metal::MetalLayer, + is_apple_gpu: bool, + is_unified_memory: bool, presents_with_transaction: bool, command_queue: CommandQueue, paths_rasterization_pipeline_state: metal::RenderPipelineState, @@ -186,6 +198,15 @@ impl MetalRenderer { output } + // Shared memory can be used only if CPU and GPU share the same memory space. + // https://developer.apple.com/documentation/metal/setting-resource-storage-modes + let is_unified_memory = device.has_unified_memory(); + // Apple GPU families support memoryless textures, which can significantly reduce + // memory usage by keeping render targets in on-chip tile memory instead of + // allocating backing store in system memory. + // https://developer.apple.com/documentation/metal/mtlgpufamily + let is_apple_gpu = device.supports_family(MTLGPUFamily::Apple1); + let unit_vertices = [ to_float2_bits(point(0., 0.)), to_float2_bits(point(1., 0.)), @@ -197,7 +218,12 @@ impl MetalRenderer { let unit_vertices = device.new_buffer_with_data( unit_vertices.as_ptr() as *const c_void, mem::size_of_val(&unit_vertices) as u64, - MTLResourceOptions::StorageModeManaged, + if is_unified_memory { + MTLResourceOptions::StorageModeShared + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }, ); let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state( @@ -267,7 +293,7 @@ impl MetalRenderer { ); let command_queue = device.new_command_queue(); - let sprite_atlas = Arc::new(MetalAtlas::new(device.clone())); + let sprite_atlas = Arc::new(MetalAtlas::new(device.clone(), is_apple_gpu)); let core_video_texture_cache = CVMetalTextureCache::new(None, device.clone(), None).unwrap(); @@ -275,6 +301,8 @@ impl MetalRenderer { device, layer, presents_with_transaction: false, + is_apple_gpu, + is_unified_memory, command_queue, paths_rasterization_pipeline_state, path_sprites_pipeline_state, @@ -344,14 +372,23 @@ impl MetalRenderer { texture_descriptor.set_width(size.width.0 as u64); texture_descriptor.set_height(size.height.0 as u64); texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm); + texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private); texture_descriptor .set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead); self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor)); if self.path_sample_count > 1 { + // https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus + // Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon + let storage_mode = if self.is_apple_gpu { + metal::MTLStorageMode::Memoryless + } else { + metal::MTLStorageMode::Private + }; + let msaa_descriptor = texture_descriptor; msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample); - msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private); + msaa_descriptor.set_storage_mode(storage_mode); msaa_descriptor.set_sample_count(self.path_sample_count as _); self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor)); } else { @@ -385,7 +422,10 @@ impl MetalRenderer { }; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.is_unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -449,7 +489,10 @@ impl MetalRenderer { .ok_or_else(|| anyhow::anyhow!("Failed to get drawable for render_to_image"))?; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.is_unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -646,10 +689,14 @@ impl MetalRenderer { command_encoder.end_encoding(); - instance_buffer.metal_buffer.did_modify_range(NSRange { - location: 0, - length: instance_offset as NSUInteger, - }); + if !self.is_unified_memory { + // Sync the instance buffer to the GPU + instance_buffer.metal_buffer.did_modify_range(NSRange { + location: 0, + length: instance_offset as NSUInteger, + }); + } + Ok(command_buffer.to_owned()) }