@@ -15,6 +15,9 @@ pub(crate) struct MetalAtlas(Mutex<MetalAtlasState>);
impl MetalAtlas {
pub(crate) fn new(device: Device) -> Self {
MetalAtlas(Mutex::new(MetalAtlasState {
+ // Shared memory can be used only if CPU and GPU share the same memory space.
+ // https://developer.apple.com/documentation/metal/setting-resource-storage-modes
+ unified_memory: device.has_unified_memory(),
device: AssertSend(device),
monochrome_textures: Default::default(),
polychrome_textures: Default::default(),
@@ -29,6 +32,7 @@ impl MetalAtlas {
struct MetalAtlasState {
device: AssertSend<Device>,
+ unified_memory: bool,
monochrome_textures: AtlasTextureList<MetalAtlasTexture>,
polychrome_textures: AtlasTextureList<MetalAtlasTexture>,
tiles_by_key: FxHashMap<AtlasKey, AtlasTile>,
@@ -146,6 +150,11 @@ impl MetalAtlasState {
}
texture_descriptor.set_pixel_format(pixel_format);
texture_descriptor.set_usage(usage);
+ texture_descriptor.set_storage_mode(if self.unified_memory {
+ metal::MTLStorageMode::Shared
+ } else {
+ metal::MTLStorageMode::Managed
+ });
let metal_texture = self.device.new_texture(&texture_descriptor);
let texture_list = match kind {
@@ -76,12 +76,22 @@ impl InstanceBufferPool {
self.buffers.clear();
}
- pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
+ pub(crate) fn acquire(
+ &mut self,
+ device: &metal::Device,
+ unified_memory: bool,
+ ) -> InstanceBuffer {
let buffer = self.buffers.pop().unwrap_or_else(|| {
- device.new_buffer(
- self.buffer_size as u64,
- MTLResourceOptions::StorageModeManaged,
- )
+ let options = if unified_memory {
+ MTLResourceOptions::StorageModeShared
+ // Buffers are write only which can benefit from the combined cache
+ // https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined
+ | MTLResourceOptions::CPUCacheModeWriteCombined
+ } else {
+ MTLResourceOptions::StorageModeManaged
+ };
+
+ device.new_buffer(self.buffer_size as u64, options)
});
InstanceBuffer {
metal_buffer: buffer,
@@ -99,6 +109,7 @@ impl InstanceBufferPool {
pub(crate) struct MetalRenderer {
device: metal::Device,
layer: metal::MetalLayer,
+ unified_memory: bool,
presents_with_transaction: bool,
command_queue: CommandQueue,
paths_rasterization_pipeline_state: metal::RenderPipelineState,
@@ -179,6 +190,10 @@ impl MetalRenderer {
output
}
+ // Shared memory can be used only if CPU and GPU share the same memory space.
+ // https://developer.apple.com/documentation/metal/setting-resource-storage-modes
+ let unified_memory = device.has_unified_memory();
+
let unit_vertices = [
to_float2_bits(point(0., 0.)),
to_float2_bits(point(1., 0.)),
@@ -190,7 +205,12 @@ impl MetalRenderer {
let unit_vertices = device.new_buffer_with_data(
unit_vertices.as_ptr() as *const c_void,
mem::size_of_val(&unit_vertices) as u64,
- MTLResourceOptions::StorageModeManaged,
+ if unified_memory {
+ MTLResourceOptions::StorageModeShared
+ | MTLResourceOptions::CPUCacheModeWriteCombined
+ } else {
+ MTLResourceOptions::StorageModeManaged
+ },
);
let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state(
@@ -268,6 +288,7 @@ impl MetalRenderer {
device,
layer,
presents_with_transaction: false,
+ unified_memory,
command_queue,
paths_rasterization_pipeline_state,
path_sprites_pipeline_state,
@@ -337,14 +358,23 @@ impl MetalRenderer {
texture_descriptor.set_width(size.width.0 as u64);
texture_descriptor.set_height(size.height.0 as u64);
texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm);
+ texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
texture_descriptor
.set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead);
self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor));
if self.path_sample_count > 1 {
+ // https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
+ // Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon
+ let storage_mode = if self.unified_memory {
+ metal::MTLStorageMode::Memoryless
+ } else {
+ metal::MTLStorageMode::Private
+ };
+
let mut msaa_descriptor = texture_descriptor;
msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
- msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
+ msaa_descriptor.set_storage_mode(storage_mode);
msaa_descriptor.set_sample_count(self.path_sample_count as _);
self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor));
} else {
@@ -378,7 +408,10 @@ impl MetalRenderer {
};
loop {
- let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
+ let mut instance_buffer = self
+ .instance_buffer_pool
+ .lock()
+ .acquire(&self.device, self.unified_memory);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
@@ -550,10 +583,14 @@ impl MetalRenderer {
command_encoder.end_encoding();
- instance_buffer.metal_buffer.did_modify_range(NSRange {
- location: 0,
- length: instance_offset as NSUInteger,
- });
+ if !self.unified_memory {
+ // Sync the instance buffer to the GPU
+ instance_buffer.metal_buffer.did_modify_range(NSRange {
+ location: 0,
+ length: instance_offset as NSUInteger,
+ });
+ }
+
Ok(command_buffer.to_owned())
}