gpui: Provide workaround for AMD Linux driver bug (#26890)

Martin Fischer created

There apparently is some amdgpu/radv bug that rendering with
multisample anti-aliasing (MSAA) results in a crash when the bounds
of a triangle list exceed 1024px, which in Zed happens with the default
buffer font size when you select a line with more than 144 characters.

This crash has been reported as #26143.

This commit introduces a workaround: you can set the
ZED_PATH_SAMPLE_COUNT=0
environment variable to disable MSAA and the error message we print
when a GPU crash is encountered with radv now suggests trying this
environment
variable as a workaround and links the respective issue.

Sidenote: MSAA was introduced in
f08b1d78ec1655a56b07c5efcd74d225d2a4b7d5
so you didn't run into this driver bug with versions < 0.173.8.

Release Notes:

- Added a workaround for an AMD Linux driver bug that causes Zed to
crash when selecting long lines.

Change summary

crates/gpui/src/platform/blade/blade_renderer.rs | 29 ++++++++++++++---
docs/src/linux.md                                |  1 
2 files changed, 24 insertions(+), 6 deletions(-)

Detailed changes

crates/gpui/src/platform/blade/blade_renderer.rs 🔗

@@ -18,7 +18,7 @@ use std::{mem, sync::Arc};
 const MAX_FRAME_TIME_MS: u32 = 10000;
 // Use 4x MSAA, all devices support it.
 // https://developer.apple.com/documentation/metal/mtldevice/1433355-supportstexturesamplecount
-const PATH_SAMPLE_COUNT: u32 = 4;
+const DEFAULT_PATH_SAMPLE_COUNT: u32 = 4;
 
 #[repr(C)]
 #[derive(Clone, Copy, Pod, Zeroable)]
@@ -130,7 +130,7 @@ struct BladePipelines {
 }
 
 impl BladePipelines {
-    fn new(gpu: &gpu::Context, surface_info: gpu::SurfaceInfo) -> Self {
+    fn new(gpu: &gpu::Context, surface_info: gpu::SurfaceInfo, path_sample_count: u32) -> Self {
         use gpu::ShaderData as _;
 
         log::info!(
@@ -211,7 +211,7 @@ impl BladePipelines {
                     write_mask: gpu::ColorWrites::default(),
                 }],
                 multisample_state: gpu::MultisampleState {
-                    sample_count: PATH_SAMPLE_COUNT,
+                    sample_count: path_sample_count,
                     ..Default::default()
                 },
             }),
@@ -322,6 +322,7 @@ pub struct BladeRenderer {
     atlas_sampler: gpu::Sampler,
     #[cfg(target_os = "macos")]
     core_video_texture_cache: CVMetalTextureCache,
+    path_sample_count: u32,
 }
 
 impl BladeRenderer {
@@ -347,13 +348,18 @@ impl BladeRenderer {
             name: "main",
             buffer_count: 2,
         });
-        let pipelines = BladePipelines::new(&context.gpu, surface.info());
+        // workaround for https://github.com/zed-industries/zed/issues/26143
+        let path_sample_count = std::env::var("ZED_PATH_SAMPLE_COUNT")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(DEFAULT_PATH_SAMPLE_COUNT);
+        let pipelines = BladePipelines::new(&context.gpu, surface.info(), path_sample_count);
         let instance_belt = BufferBelt::new(BufferBeltDescriptor {
             memory: gpu::Memory::Shared,
             min_chunk_size: 0x1000,
             alignment: 0x40, // Vulkan `minStorageBufferOffsetAlignment` on Intel Xe
         });
-        let atlas = Arc::new(BladeAtlas::new(&context.gpu, PATH_SAMPLE_COUNT));
+        let atlas = Arc::new(BladeAtlas::new(&context.gpu, path_sample_count));
         let atlas_sampler = context.gpu.create_sampler(gpu::SamplerDesc {
             name: "atlas",
             mag_filter: gpu::FilterMode::Linear,
@@ -382,6 +388,7 @@ impl BladeRenderer {
             atlas_sampler,
             #[cfg(target_os = "macos")]
             core_video_texture_cache,
+            path_sample_count,
         })
     }
 
@@ -389,6 +396,15 @@ impl BladeRenderer {
         if let Some(last_sp) = self.last_sync_point.take() {
             if !self.gpu.wait_for(&last_sp, MAX_FRAME_TIME_MS) {
                 log::error!("GPU hung");
+                #[cfg(target_os = "linux")]
+                if self.gpu.device_information().driver_name == "radv" {
+                    log::error!("there's a known bug with amdgpu/radv, try setting ZED_PATH_SAMPLE_COUNT=0 as a workaround");
+                    log::error!("if that helps you're running into https://github.com/zed-industries/zed/issues/26143");
+                }
+                log::error!(
+                    "your device information is: {:?}",
+                    self.gpu.device_information()
+                );
                 while !self.gpu.wait_for(&last_sp, MAX_FRAME_TIME_MS) {}
             }
         }
@@ -428,7 +444,8 @@ impl BladeRenderer {
             self.gpu
                 .reconfigure_surface(&mut self.surface, self.surface_config);
             self.pipelines.destroy(&self.gpu);
-            self.pipelines = BladePipelines::new(&self.gpu, self.surface.info());
+            self.pipelines =
+                BladePipelines::new(&self.gpu, self.surface.info(), self.path_sample_count);
         }
     }
 

docs/src/linux.md 🔗

@@ -111,6 +111,7 @@ On some systems the file `/etc/prime-discrete` can be used to enforce the use of
 
 On others, you may be able to the environment variable `DRI_PRIME=1` when running Zed to force the use of the discrete GPU.
 
+If you're using an AMD GPU and Zed crashes when selecting long lines, try setting the `ZED_PATH_SAMPLE_COUNT=0` environment variable. (See [#26143](https://github.com/zed-industries/zed/issues/26143))
 If you're using an AMD GPU, you might get a 'Broken Pipe' error. Try using the RADV or Mesa drivers. (See [#13880](https://github.com/zed-industries/zed/issues/13880))
 
 If you are using Mesa, and want more control over which GPU is selected you can run `MESA_VK_DEVICE_SELECT=list zed --foreground` to get a list of available GPUs and then export `MESA_VK_DEVICE_SELECT=xxxx:yyyy` to choose a specific device.