Detailed changes
@@ -3755,9 +3755,9 @@ dependencies = [
[[package]]
name = "core-graphics2"
-version = "0.4.1"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e4583956b9806b69f73fcb23aee05eb3620efc282972f08f6a6db7504f8334d"
+checksum = "4416167a69126e617f8d0a214af0e3c1dbdeffcb100ddf72dcd1a1ac9893c146"
dependencies = [
"bitflags 2.10.0",
"block",
@@ -3789,16 +3789,16 @@ dependencies = [
[[package]]
name = "core-video"
-version = "0.4.3"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d45e71d5be22206bed53c3c3cb99315fc4c3d31b8963808c6bc4538168c4f8ef"
+checksum = "139679cc63eb9504bdbe37e37874b0247136177655f0008588781e90863afa62"
dependencies = [
"block",
"core-foundation 0.10.0",
"core-graphics2",
"io-surface",
"libc",
- "metal 0.29.0",
+ "metal",
]
[[package]]
@@ -7407,7 +7407,7 @@ dependencies = [
"lyon",
"mach2 0.5.0",
"media",
- "metal 0.29.0",
+ "metal",
"naga",
"num_cpus",
"objc",
@@ -7525,7 +7525,7 @@ dependencies = [
"log",
"mach2 0.5.0",
"media",
- "metal 0.29.0",
+ "metal",
"objc",
"parking_lot",
"pathfinder_geometry",
@@ -10106,7 +10106,7 @@ dependencies = [
"core-video",
"ctor",
"foreign-types 0.5.0",
- "metal 0.29.0",
+ "metal",
"objc",
]
@@ -10189,21 +10189,6 @@ dependencies = [
"ttf-parser 0.20.0",
]
-[[package]]
-name = "metal"
-version = "0.29.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
-dependencies = [
- "bitflags 2.10.0",
- "block",
- "core-graphics-types 0.1.3",
- "foreign-types 0.5.0",
- "log",
- "objc",
- "paste",
-]
-
[[package]]
name = "metal"
version = "0.33.0"
@@ -19634,7 +19619,7 @@ dependencies = [
"libc",
"libloading",
"log",
- "metal 0.33.0",
+ "metal",
"naga",
"ndk-sys",
"objc",
@@ -523,7 +523,7 @@ cocoa-foundation = "=0.2.0"
convert_case = "0.8.0"
core-foundation = "=0.10.0"
core-foundation-sys = "0.8.6"
-core-video = { version = "0.4.3", features = ["metal"] }
+core-video = { version = "0.5.2", features = ["metal"] }
cpal = "0.17"
crash-handler = "0.6"
criterion = { version = "0.5", features = ["html_reports"] }
@@ -574,7 +574,7 @@ log = { version = "0.4.16", features = ["kv_unstable_serde", "serde"] }
lsp-types = { git = "https://github.com/zed-industries/lsp-types", rev = "a4f410987660bf560d1e617cb78117c6b6b9f599" }
mach2 = "0.5"
markup5ever_rcdom = "0.3.0"
-metal = "0.29"
+metal = "0.33"
minidumper = "0.8"
moka = { version = "0.12.10", features = ["sync"] }
naga = { version = "28.0", features = ["wgsl-in"] }
@@ -13,9 +13,10 @@ use std::borrow::Cow;
pub(crate) struct MetalAtlas(Mutex<MetalAtlasState>);
impl MetalAtlas {
- pub(crate) fn new(device: Device) -> Self {
+ pub(crate) fn new(device: Device, is_apple_gpu: bool) -> Self {
MetalAtlas(Mutex::new(MetalAtlasState {
device: AssertSend(device),
+ is_apple_gpu,
monochrome_textures: Default::default(),
polychrome_textures: Default::default(),
tiles_by_key: Default::default(),
@@ -29,6 +30,7 @@ impl MetalAtlas {
struct MetalAtlasState {
device: AssertSend<Device>,
+ is_apple_gpu: bool,
monochrome_textures: AtlasTextureList<MetalAtlasTexture>,
polychrome_textures: AtlasTextureList<MetalAtlasTexture>,
tiles_by_key: FxHashMap<AtlasKey, AtlasTile>,
@@ -149,6 +151,13 @@ impl MetalAtlasState {
}
texture_descriptor.set_pixel_format(pixel_format);
texture_descriptor.set_usage(usage);
+ // Shared memory mode can be used only on Apple GPU families
+ // https://developer.apple.com/documentation/metal/mtlresourceoptions/storagemodeshared
+ texture_descriptor.set_storage_mode(if self.is_apple_gpu {
+ metal::MTLStorageMode::Shared
+ } else {
+ metal::MTLStorageMode::Managed
+ });
let metal_texture = self.device.new_texture(&texture_descriptor);
let texture_list = match kind {
@@ -21,7 +21,7 @@ use core_video::{
};
use foreign_types::{ForeignType, ForeignTypeRef};
use metal::{
- CAMetalLayer, CommandQueue, MTLPixelFormat, MTLResourceOptions, NSRange,
+ CAMetalLayer, CommandQueue, MTLGPUFamily, MTLPixelFormat, MTLResourceOptions, NSRange,
RenderPassColorAttachmentDescriptorRef,
};
use objc::{self, msg_send, sel, sel_impl};
@@ -78,12 +78,22 @@ impl InstanceBufferPool {
self.buffers.clear();
}
- pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
+ pub(crate) fn acquire(
+ &mut self,
+ device: &metal::Device,
+ unified_memory: bool,
+ ) -> InstanceBuffer {
let buffer = self.buffers.pop().unwrap_or_else(|| {
- device.new_buffer(
- self.buffer_size as u64,
- MTLResourceOptions::StorageModeManaged,
- )
+ let options = if unified_memory {
+ MTLResourceOptions::StorageModeShared
+ // Buffers are write only which can benefit from the combined cache
+ // https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined
+ | MTLResourceOptions::CPUCacheModeWriteCombined
+ } else {
+ MTLResourceOptions::StorageModeManaged
+ };
+
+ device.new_buffer(self.buffer_size as u64, options)
});
InstanceBuffer {
metal_buffer: buffer,
@@ -101,6 +111,8 @@ impl InstanceBufferPool {
pub(crate) struct MetalRenderer {
device: metal::Device,
layer: metal::MetalLayer,
+ is_apple_gpu: bool,
+ is_unified_memory: bool,
presents_with_transaction: bool,
command_queue: CommandQueue,
paths_rasterization_pipeline_state: metal::RenderPipelineState,
@@ -186,6 +198,15 @@ impl MetalRenderer {
output
}
+ // Shared memory can be used only if CPU and GPU share the same memory space.
+ // https://developer.apple.com/documentation/metal/setting-resource-storage-modes
+ let is_unified_memory = device.has_unified_memory();
+ // Apple GPU families support memoryless textures, which can significantly reduce
+ // memory usage by keeping render targets in on-chip tile memory instead of
+ // allocating backing store in system memory.
+ // https://developer.apple.com/documentation/metal/mtlgpufamily
+ let is_apple_gpu = device.supports_family(MTLGPUFamily::Apple1);
+
let unit_vertices = [
to_float2_bits(point(0., 0.)),
to_float2_bits(point(1., 0.)),
@@ -197,7 +218,12 @@ impl MetalRenderer {
let unit_vertices = device.new_buffer_with_data(
unit_vertices.as_ptr() as *const c_void,
mem::size_of_val(&unit_vertices) as u64,
- MTLResourceOptions::StorageModeManaged,
+ if is_unified_memory {
+ MTLResourceOptions::StorageModeShared
+ | MTLResourceOptions::CPUCacheModeWriteCombined
+ } else {
+ MTLResourceOptions::StorageModeManaged
+ },
);
let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state(
@@ -267,7 +293,7 @@ impl MetalRenderer {
);
let command_queue = device.new_command_queue();
- let sprite_atlas = Arc::new(MetalAtlas::new(device.clone()));
+ let sprite_atlas = Arc::new(MetalAtlas::new(device.clone(), is_apple_gpu));
let core_video_texture_cache =
CVMetalTextureCache::new(None, device.clone(), None).unwrap();
@@ -275,6 +301,8 @@ impl MetalRenderer {
device,
layer,
presents_with_transaction: false,
+ is_apple_gpu,
+ is_unified_memory,
command_queue,
paths_rasterization_pipeline_state,
path_sprites_pipeline_state,
@@ -344,14 +372,23 @@ impl MetalRenderer {
texture_descriptor.set_width(size.width.0 as u64);
texture_descriptor.set_height(size.height.0 as u64);
texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm);
+ texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
texture_descriptor
.set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead);
self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor));
if self.path_sample_count > 1 {
+ // https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
+ // Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon
+ let storage_mode = if self.is_apple_gpu {
+ metal::MTLStorageMode::Memoryless
+ } else {
+ metal::MTLStorageMode::Private
+ };
+
let msaa_descriptor = texture_descriptor;
msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
- msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
+ msaa_descriptor.set_storage_mode(storage_mode);
msaa_descriptor.set_sample_count(self.path_sample_count as _);
self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor));
} else {
@@ -385,7 +422,10 @@ impl MetalRenderer {
};
loop {
- let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
+ let mut instance_buffer = self
+ .instance_buffer_pool
+ .lock()
+ .acquire(&self.device, self.is_unified_memory);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
@@ -449,7 +489,10 @@ impl MetalRenderer {
.ok_or_else(|| anyhow::anyhow!("Failed to get drawable for render_to_image"))?;
loop {
- let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
+ let mut instance_buffer = self
+ .instance_buffer_pool
+ .lock()
+ .acquire(&self.device, self.is_unified_memory);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
@@ -646,10 +689,14 @@ impl MetalRenderer {
command_encoder.end_encoding();
- instance_buffer.metal_buffer.did_modify_range(NSRange {
- location: 0,
- length: instance_offset as NSUInteger,
- });
+ if !self.is_unified_memory {
+ // Sync the instance buffer to the GPU
+ instance_buffer.metal_buffer.did_modify_range(NSRange {
+ location: 0,
+ length: instance_offset as NSUInteger,
+ });
+ }
+
Ok(command_buffer.to_owned())
}