diff --git a/Cargo.lock b/Cargo.lock index 7845cb1563c478485a96abfff8da1a3d2355599f..6cf3df6920cb873037639ea37b08275974273bd3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3880,9 +3880,9 @@ dependencies = [ [[package]] name = "core-graphics2" -version = "0.4.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4583956b9806b69f73fcb23aee05eb3620efc282972f08f6a6db7504f8334d" +checksum = "4416167a69126e617f8d0a214af0e3c1dbdeffcb100ddf72dcd1a1ac9893c146" dependencies = [ "bitflags 2.9.4", "block", @@ -3914,9 +3914,9 @@ dependencies = [ [[package]] name = "core-video" -version = "0.4.3" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d45e71d5be22206bed53c3c3cb99315fc4c3d31b8963808c6bc4538168c4f8ef" +checksum = "139679cc63eb9504bdbe37e37874b0247136177655f0008588781e90863afa62" dependencies = [ "block", "core-foundation 0.10.0", @@ -10093,13 +10093,13 @@ dependencies = [ [[package]] name = "metal" -version = "0.29.0" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" +checksum = "c7047791b5bc903b8cd963014b355f71dc9864a9a0b727057676c1dcae5cbc15" dependencies = [ "bitflags 2.9.4", "block", - "core-graphics-types 0.1.3", + "core-graphics-types 0.2.0", "foreign-types 0.5.0", "log", "objc", diff --git a/Cargo.toml b/Cargo.toml index 6f7af218b567d0d8eedb46fb03efd45bfa1b1cbd..76d2c7ed85e949475be5f3ac713026fadfc4b1af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -507,7 +507,7 @@ cocoa-foundation = "=0.2.0" convert_case = "0.8.0" core-foundation = "=0.10.0" core-foundation-sys = "0.8.6" -core-video = { version = "0.4.3", features = ["metal"] } +core-video = { version = "0.5.2", features = ["metal"] } cpal = "0.16" crash-handler = "0.6" criterion = { version = "0.5", features = ["html_reports"] } @@ -558,7 +558,7 @@ log = { version = "0.4.16", features = ["kv_unstable_serde", "serde"] } lsp-types = { git = "https://github.com/zed-industries/lsp-types", rev = "b71ab4eeb27d9758be8092020a46fe33fbca4e33" } mach2 = "0.5" markup5ever_rcdom = "0.3.0" -metal = "0.29" +metal = "0.33" minidumper = "0.8" moka = { version = "0.12.10", features = ["sync"] } naga = { version = "25.0", features = ["wgsl-in"] } diff --git a/crates/gpui/src/platform/mac/metal_atlas.rs b/crates/gpui/src/platform/mac/metal_atlas.rs index ad2e355bed440dd27ebf0e784a4b5cf9c83f111a..796d29669d38616ea54ff35410eec5260f0f582d 100644 --- a/crates/gpui/src/platform/mac/metal_atlas.rs +++ b/crates/gpui/src/platform/mac/metal_atlas.rs @@ -15,6 +15,9 @@ pub(crate) struct MetalAtlas(Mutex); impl MetalAtlas { pub(crate) fn new(device: Device) -> Self { MetalAtlas(Mutex::new(MetalAtlasState { + // Shared memory can be used only if CPU and GPU share the same memory space. + // https://developer.apple.com/documentation/metal/setting-resource-storage-modes + unified_memory: device.has_unified_memory(), device: AssertSend(device), monochrome_textures: Default::default(), polychrome_textures: Default::default(), @@ -29,6 +32,7 @@ impl MetalAtlas { struct MetalAtlasState { device: AssertSend, + unified_memory: bool, monochrome_textures: AtlasTextureList, polychrome_textures: AtlasTextureList, tiles_by_key: FxHashMap, @@ -149,6 +153,11 @@ impl MetalAtlasState { } texture_descriptor.set_pixel_format(pixel_format); texture_descriptor.set_usage(usage); + texture_descriptor.set_storage_mode(if self.unified_memory { + metal::MTLStorageMode::Shared + } else { + metal::MTLStorageMode::Managed + }); let metal_texture = self.device.new_texture(&texture_descriptor); let texture_list = match kind { diff --git a/crates/gpui/src/platform/mac/metal_renderer.rs b/crates/gpui/src/platform/mac/metal_renderer.rs index 07e7cdd2763226e92948e7b02b2c09383a0aa52f..9f92ab5379b24f73c58c2bc18ef711455fd62734 100644 --- a/crates/gpui/src/platform/mac/metal_renderer.rs +++ b/crates/gpui/src/platform/mac/metal_renderer.rs @@ -78,12 +78,22 @@ impl InstanceBufferPool { self.buffers.clear(); } - pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer { + pub(crate) fn acquire( + &mut self, + device: &metal::Device, + unified_memory: bool, + ) -> InstanceBuffer { let buffer = self.buffers.pop().unwrap_or_else(|| { - device.new_buffer( - self.buffer_size as u64, - MTLResourceOptions::StorageModeManaged, - ) + let options = if unified_memory { + MTLResourceOptions::StorageModeShared + // Buffers are write only which can benefit from the combined cache + // https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }; + + device.new_buffer(self.buffer_size as u64, options) }); InstanceBuffer { metal_buffer: buffer, @@ -101,6 +111,7 @@ impl InstanceBufferPool { pub(crate) struct MetalRenderer { device: metal::Device, layer: metal::MetalLayer, + unified_memory: bool, presents_with_transaction: bool, command_queue: CommandQueue, paths_rasterization_pipeline_state: metal::RenderPipelineState, @@ -186,6 +197,10 @@ impl MetalRenderer { output } + // Shared memory can be used only if CPU and GPU share the same memory space. + // https://developer.apple.com/documentation/metal/setting-resource-storage-modes + let unified_memory = device.has_unified_memory(); + let unit_vertices = [ to_float2_bits(point(0., 0.)), to_float2_bits(point(1., 0.)), @@ -197,7 +212,12 @@ impl MetalRenderer { let unit_vertices = device.new_buffer_with_data( unit_vertices.as_ptr() as *const c_void, mem::size_of_val(&unit_vertices) as u64, - MTLResourceOptions::StorageModeManaged, + if unified_memory { + MTLResourceOptions::StorageModeShared + | MTLResourceOptions::CPUCacheModeWriteCombined + } else { + MTLResourceOptions::StorageModeManaged + }, ); let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state( @@ -275,6 +295,7 @@ impl MetalRenderer { device, layer, presents_with_transaction: false, + unified_memory, command_queue, paths_rasterization_pipeline_state, path_sprites_pipeline_state, @@ -344,14 +365,23 @@ impl MetalRenderer { texture_descriptor.set_width(size.width.0 as u64); texture_descriptor.set_height(size.height.0 as u64); texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm); + texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private); texture_descriptor .set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead); self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor)); if self.path_sample_count > 1 { + // https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus + // Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon + let storage_mode = if self.unified_memory { + metal::MTLStorageMode::Memoryless + } else { + metal::MTLStorageMode::Private + }; + let mut msaa_descriptor = texture_descriptor; msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample); - msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private); + msaa_descriptor.set_storage_mode(storage_mode); msaa_descriptor.set_sample_count(self.path_sample_count as _); self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor)); } else { @@ -385,7 +415,10 @@ impl MetalRenderer { }; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -449,7 +482,10 @@ impl MetalRenderer { .ok_or_else(|| anyhow::anyhow!("Failed to get drawable for render_to_image"))?; loop { - let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device); + let mut instance_buffer = self + .instance_buffer_pool + .lock() + .acquire(&self.device, self.unified_memory); let command_buffer = self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size); @@ -649,10 +685,14 @@ impl MetalRenderer { command_encoder.end_encoding(); - instance_buffer.metal_buffer.did_modify_range(NSRange { - location: 0, - length: instance_offset as NSUInteger, - }); + if !self.unified_memory { + // Sync the instance buffer to the GPU + instance_buffer.metal_buffer.did_modify_range(NSRange { + location: 0, + length: instance_offset as NSUInteger, + }); + } + Ok(command_buffer.to_owned()) }