Start on rendering shadows

Antonio Scandurra and Nathan Sobo created

This still doesn't work properly because shadows are rendered beneath
quads and we still don't have a layering mechanism.

Co-Authored-By: Nathan Sobo <nathan@zed.dev>

Change summary

gpui/build.rs                               |  4 
gpui/src/elements/container.rs              | 10 ++
gpui/src/platform/mac/renderer.rs           | 81 +++++++++++++++++++--
gpui/src/platform/mac/shaders/shaders.h     | 18 ++++
gpui/src/platform/mac/shaders/shaders.metal | 84 ++++++++++++++++++++++
gpui/src/scene.rs                           | 20 +++++
6 files changed, 201 insertions(+), 16 deletions(-)

Detailed changes

gpui/build.rs 🔗

@@ -84,9 +84,11 @@ fn compile_metal_shaders() {
 fn generate_shader_bindings() {
     let bindings = bindgen::Builder::default()
         .header(SHADER_HEADER_PATH)
+        .whitelist_type("GPUIUniforms")
         .whitelist_type("GPUIQuadInputIndex")
         .whitelist_type("GPUIQuad")
-        .whitelist_type("GPUIQuadUniforms")
+        .whitelist_type("GPUIShadowInputIndex")
+        .whitelist_type("GPUIShadow")
         .parse_callbacks(Box::new(bindgen::CargoCallbacks))
         .generate()
         .expect("unable to generate bindings");

gpui/src/elements/container.rs 🔗

@@ -3,7 +3,7 @@ use pathfinder_geometry::rect::RectF;
 use crate::{
     color::ColorU,
     geometry::vector::{vec2f, Vector2F},
-    scene::{Border, Quad},
+    scene::{self, Border, Quad},
     AfterLayoutContext, Element, ElementBox, Event, EventContext, LayoutContext, PaintContext,
     SizeConstraint,
 };
@@ -150,6 +150,14 @@ impl Element for Container {
         _: &mut Self::LayoutState,
         ctx: &mut PaintContext,
     ) -> Self::PaintState {
+        if let Some(shadow) = self.shadow.as_ref() {
+            ctx.scene.push_shadow(scene::Shadow {
+                bounds: bounds + shadow.offset,
+                corner_radius: self.corner_radius,
+                sigma: shadow.blur,
+                color: shadow.color,
+            });
+        }
         ctx.scene.push_quad(Quad {
             bounds,
             background: self.background_color,

gpui/src/platform/mac/renderer.rs 🔗

@@ -14,7 +14,8 @@ const INSTANCE_BUFFER_SIZE: u64 = 1024 * 1024;
 
 pub struct Renderer {
     quad_pipeline_state: metal::RenderPipelineState,
-    quad_vertices: metal::Buffer,
+    shadow_pipeline_state: metal::RenderPipelineState,
+    unit_vertices: metal::Buffer,
     instances: metal::Buffer,
 }
 
@@ -24,7 +25,7 @@ impl Renderer {
             .new_library_with_data(SHADERS_METALLIB)
             .map_err(|message| anyhow!("error building metal library: {}", message))?;
 
-        let quad_vertices = [
+        let unit_vertices = [
             (0., 0.).to_float2(),
             (1., 0.).to_float2(),
             (0., 1.).to_float2(),
@@ -32,9 +33,9 @@ impl Renderer {
             (1., 0.).to_float2(),
             (1., 1.).to_float2(),
         ];
-        let quad_vertices = device.new_buffer_with_data(
-            quad_vertices.as_ptr() as *const c_void,
-            (quad_vertices.len() * mem::size_of::<shaders::vector_float2>()) as u64,
+        let unit_vertices = device.new_buffer_with_data(
+            unit_vertices.as_ptr() as *const c_void,
+            (unit_vertices.len() * mem::size_of::<shaders::vector_float2>()) as u64,
             MTLResourceOptions::StorageModeManaged,
         );
         let instances =
@@ -49,7 +50,15 @@ impl Renderer {
                 "quad_fragment",
                 pixel_format,
             )?,
-            quad_vertices,
+            shadow_pipeline_state: build_pipeline_state(
+                device,
+                &library,
+                "shadow",
+                "shadow_vertex",
+                "shadow_fragment",
+                pixel_format,
+            )?,
+            unit_vertices,
             instances,
         })
     }
@@ -65,16 +74,70 @@ impl Renderer {
         });
 
         for layer in scene.layers() {
+            self.render_shadows(scene, layer, ctx);
             self.render_quads(scene, layer, ctx);
         }
     }
 
+    fn render_shadows(&mut self, scene: &Scene, layer: &Layer, ctx: &RenderContext) {
+        ctx.command_encoder
+            .set_render_pipeline_state(&self.shadow_pipeline_state);
+        ctx.command_encoder.set_vertex_buffer(
+            shaders::GPUIShadowInputIndex_GPUIShadowInputIndexVertices as u64,
+            Some(&self.unit_vertices),
+            0,
+        );
+        ctx.command_encoder.set_vertex_buffer(
+            shaders::GPUIShadowInputIndex_GPUIShadowInputIndexShadows as u64,
+            Some(&self.instances),
+            0,
+        );
+        ctx.command_encoder.set_vertex_bytes(
+            shaders::GPUIShadowInputIndex_GPUIShadowInputIndexUniforms as u64,
+            mem::size_of::<shaders::GPUIUniforms>() as u64,
+            [shaders::GPUIUniforms {
+                viewport_size: ctx.drawable_size.to_float2(),
+            }]
+            .as_ptr() as *const c_void,
+        );
+
+        let batch_size = self.instances.length() as usize / mem::size_of::<shaders::GPUIShadow>();
+
+        let buffer_contents = self.instances.contents() as *mut shaders::GPUIShadow;
+        for shadow_batch in layer.shadows().chunks(batch_size) {
+            for (ix, shadow) in shadow_batch.iter().enumerate() {
+                let shape_bounds = shadow.bounds * scene.scale_factor();
+                let shader_shadow = shaders::GPUIShadow {
+                    origin: shape_bounds.origin().to_float2(),
+                    size: shape_bounds.size().to_float2(),
+                    corner_radius: shadow.corner_radius,
+                    sigma: shadow.sigma,
+                    color: shadow.color.to_uchar4(),
+                };
+                unsafe {
+                    *(buffer_contents.offset(ix as isize)) = shader_shadow;
+                }
+            }
+            self.instances.did_modify_range(NSRange {
+                location: 0,
+                length: (shadow_batch.len() * mem::size_of::<shaders::GPUIShadow>()) as u64,
+            });
+
+            ctx.command_encoder.draw_primitives_instanced(
+                metal::MTLPrimitiveType::Triangle,
+                0,
+                6,
+                shadow_batch.len() as u64,
+            );
+        }
+    }
+
     fn render_quads(&mut self, scene: &Scene, layer: &Layer, ctx: &RenderContext) {
         ctx.command_encoder
             .set_render_pipeline_state(&self.quad_pipeline_state);
         ctx.command_encoder.set_vertex_buffer(
             shaders::GPUIQuadInputIndex_GPUIQuadInputIndexVertices as u64,
-            Some(&self.quad_vertices),
+            Some(&self.unit_vertices),
             0,
         );
         ctx.command_encoder.set_vertex_buffer(
@@ -84,8 +147,8 @@ impl Renderer {
         );
         ctx.command_encoder.set_vertex_bytes(
             shaders::GPUIQuadInputIndex_GPUIQuadInputIndexUniforms as u64,
-            mem::size_of::<shaders::GPUIQuadUniforms>() as u64,
-            [shaders::GPUIQuadUniforms {
+            mem::size_of::<shaders::GPUIUniforms>() as u64,
+            [shaders::GPUIUniforms {
                 viewport_size: ctx.drawable_size.to_float2(),
             }]
             .as_ptr() as *const c_void,

gpui/src/platform/mac/shaders/shaders.h 🔗

@@ -1,5 +1,9 @@
 #include <simd/simd.h>
 
+typedef struct {
+    vector_float2 viewport_size;
+} GPUIUniforms;
+
 typedef enum {
     GPUIQuadInputIndexVertices = 0,
     GPUIQuadInputIndexQuads = 1,
@@ -18,6 +22,16 @@ typedef struct {
     float corner_radius;
 } GPUIQuad;
 
+typedef enum {
+    GPUIShadowInputIndexVertices = 0,
+    GPUIShadowInputIndexShadows = 1,
+    GPUIShadowInputIndexUniforms = 2,
+} GPUIShadowInputIndex;
+
 typedef struct {
-    vector_float2 viewport_size;
-} GPUIQuadUniforms;
+    vector_float2 origin;
+    vector_float2 size;
+    float corner_radius;
+    float sigma;
+    vector_uchar4 color;
+} GPUIShadow;

gpui/src/platform/mac/shaders/shaders.metal 🔗

@@ -7,6 +7,31 @@ float4 coloru_to_colorf(uchar4 coloru) {
     return float4(coloru) / float4(0xff, 0xff, 0xff, 0xff);
 }
 
+float4 to_device_position(float2 pixel_position, float2 viewport_size) {
+    return float4(pixel_position / viewport_size * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+}
+
+// A standard gaussian function, used for weighting samples
+float gaussian(float x, float sigma) {
+    return exp(-(x * x) / (2.0 * sigma * sigma)) / (sqrt(2.0 * M_PI_F) * sigma);
+}
+
+// This approximates the error function, needed for the gaussian integral
+float2 erf(float2 x) {
+    float2 s = sign(x);
+    float2 a = abs(x);
+    x = 1.0 + (0.278393 + (0.230389 + 0.078108 * (a * a)) * a) * a;
+    x *= x;
+    return s - s / (x * x);
+}
+
+float blur_along_x(float x, float y, float sigma, float corner, float2 halfSize) {
+    float delta = min(halfSize.y - corner - abs(y), 0.0);
+    float curved = halfSize.x - corner + sqrt(max(0.0, corner * corner - delta * delta));
+    float2 integral = 0.5 + 0.5 * erf((x + float2(-curved, curved)) * (sqrt(0.5) / sigma));
+    return integral.y - integral.x;
+}
+
 struct QuadFragmentInput {
     float4 position [[position]];
     GPUIQuad quad;
@@ -17,12 +42,12 @@ vertex QuadFragmentInput quad_vertex(
     uint quad_id [[instance_id]],
     constant float2 *unit_vertices [[buffer(GPUIQuadInputIndexVertices)]],
     constant GPUIQuad *quads [[buffer(GPUIQuadInputIndexQuads)]],
-    constant GPUIQuadUniforms *uniforms [[buffer(GPUIQuadInputIndexUniforms)]]
+    constant GPUIUniforms *uniforms [[buffer(GPUIQuadInputIndexUniforms)]]
 ) {
     float2 unit_vertex = unit_vertices[unit_vertex_id];
     GPUIQuad quad = quads[quad_id];
     float2 position = unit_vertex * quad.size + quad.origin;
-    float4 device_position = float4(position / uniforms->viewport_size * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+    float4 device_position = to_device_position(position, uniforms->viewport_size);
 
     return QuadFragmentInput {
         device_position,
@@ -32,7 +57,7 @@ vertex QuadFragmentInput quad_vertex(
 
 fragment float4 quad_fragment(
     QuadFragmentInput input [[stage_in]],
-    constant GPUIQuadUniforms *uniforms [[buffer(GPUIQuadInputIndexUniforms)]]
+    constant GPUIUniforms *uniforms [[buffer(GPUIQuadInputIndexUniforms)]]
 ) {
     float2 half_size = input.quad.size / 2.;
     float2 center = input.quad.origin + half_size;
@@ -57,3 +82,56 @@ fragment float4 quad_fragment(
     float4 coverage = float4(1.0, 1.0, 1.0, saturate(0.5 - distance));
     return coverage * color;
 }
+
+struct ShadowFragmentInput {
+    float4 position [[position]];
+    GPUIShadow shadow;
+};
+
+vertex ShadowFragmentInput shadow_vertex(
+    uint unit_vertex_id [[vertex_id]],
+    uint shadow_id [[instance_id]],
+    constant float2 *unit_vertices [[buffer(GPUIShadowInputIndexVertices)]],
+    constant GPUIShadow *shadows [[buffer(GPUIShadowInputIndexShadows)]],
+    constant GPUIUniforms *uniforms [[buffer(GPUIShadowInputIndexUniforms)]]
+) {
+    float2 unit_vertex = unit_vertices[unit_vertex_id];
+    GPUIShadow shadow = shadows[shadow_id];
+
+    float margin = 3. * shadow.sigma;
+    float2 position = unit_vertex * (shadow.size + 2.0 * margin) + shadow.origin - margin;
+    float4 device_position = to_device_position(position, uniforms->viewport_size);
+
+    return ShadowFragmentInput {
+        device_position,
+        shadow,
+    };
+}
+
+fragment float4 shadow_fragment(
+    ShadowFragmentInput input [[stage_in]],
+    constant GPUIUniforms *uniforms [[buffer(GPUIShadowInputIndexUniforms)]]
+) {
+    float sigma = input.shadow.sigma;
+    float corner_radius = input.shadow.corner_radius;
+    float2 half_size = input.shadow.size / 2.;
+    float2 center = input.shadow.origin + half_size;
+    float2 point = input.position.xy - center;
+
+    // The signal is only non-zero in a limited range, so don't waste samples
+    float low = point.y - half_size.y;
+    float high = point.y + half_size.y;
+    float start = clamp(-3. * sigma, low, high);
+    float end = clamp(3. * sigma, low, high);
+
+    // Accumulate samples (we can get away with surprisingly few samples)
+    float step = (end - start) / 4.;
+    float y = start + step * 0.5;
+    float alpha = 0.0;
+    for (int i = 0; i < 4; i++) {
+        alpha += blur_along_x(point.x, point.y - y, sigma, corner_radius, half_size) * gaussian(y, sigma) * step;
+        y += step;
+    }
+
+    return float4(1., 1., 1., alpha) * coloru_to_colorf(input.shadow.color);
+}

gpui/src/scene.rs 🔗

@@ -11,6 +11,7 @@ pub struct Scene {
 pub struct Layer {
     clip_bounds: Option<RectF>,
     quads: Vec<Quad>,
+    shadows: Vec<Shadow>,
 }
 
 #[derive(Default, Debug)]
@@ -21,6 +22,13 @@ pub struct Quad {
     pub corner_radius: f32,
 }
 
+pub struct Shadow {
+    pub bounds: RectF,
+    pub corner_radius: f32,
+    pub sigma: f32,
+    pub color: ColorU,
+}
+
 #[derive(Clone, Copy, Default, Debug)]
 pub struct Border {
     pub width: f32,
@@ -61,6 +69,10 @@ impl Scene {
         self.active_layer().push_quad(quad)
     }
 
+    pub fn push_shadow(&mut self, shadow: Shadow) {
+        self.active_layer().push_shadow(shadow)
+    }
+
     fn active_layer(&mut self) -> &mut Layer {
         &mut self.layers[*self.active_layer_stack.last().unwrap()]
     }
@@ -74,6 +86,14 @@ impl Layer {
     pub fn quads(&self) -> &[Quad] {
         self.quads.as_slice()
     }
+
+    fn push_shadow(&mut self, shadow: Shadow) {
+        self.shadows.push(shadow);
+    }
+
+    pub fn shadows(&self) -> &[Shadow] {
+        self.shadows.as_slice()
+    }
 }
 
 impl Border {