Fix path rendering - draw all paths w/ one regular draw call

Max Brunsfeld created

Change summary

crates/gpui/src/platform/windows/directx_renderer.rs | 255 ++++++-------
crates/gpui/src/platform/windows/shaders.hlsl        |  29 
2 files changed, 131 insertions(+), 153 deletions(-)

Detailed changes

crates/gpui/src/platform/windows/directx_renderer.rs 🔗

@@ -60,14 +60,6 @@ struct DirectXGlobalElements {
     blend_state: ID3D11BlendState,
 }
 
-#[repr(C)]
-struct DrawInstancedIndirectArgs {
-    vertex_count_per_instance: u32,
-    instance_count: u32,
-    start_vertex_location: u32,
-    start_instance_location: u32,
-}
-
 // #[cfg(not(feature = "enable-renderdoc"))]
 // struct DirectComposition {
 //     comp_device: IDCompositionDevice,
@@ -282,22 +274,12 @@ impl DirectXRenderer {
         }
         let mut vertices = Vec::new();
         let mut sprites = Vec::with_capacity(paths.len());
-        let mut draw_indirect_commands = Vec::with_capacity(paths.len());
-        let mut start_vertex_location = 0;
-        for (i, path) in paths.iter().enumerate() {
-            draw_indirect_commands.push(DrawInstancedIndirectArgs {
-                vertex_count_per_instance: path.vertices.len() as u32,
-                instance_count: 1,
-                start_vertex_location,
-                start_instance_location: i as u32,
-            });
-            start_vertex_location += path.vertices.len() as u32;
 
-            vertices.extend(path.vertices.iter().map(|v| PathVertex {
+        for (i, path) in paths.iter().enumerate() {
+            vertices.extend(path.vertices.iter().map(|v| DirectXPathVertex {
                 xy_position: v.xy_position,
-                content_mask: ContentMask {
-                    bounds: path.content_mask.bounds,
-                },
+                content_mask: path.content_mask.bounds,
+                sprite_index: i as u32,
             }));
 
             sprites.push(PathSprite {
@@ -314,7 +296,7 @@ impl DirectXRenderer {
         .map(|input| update_paths_pipeline_buffer(&mut self.pipelines.paths_pipeline, input));
         update_buffer(
             &self.devices.device_context,
-            &self.pipelines.paths_pipeline.buffer,
+            &self.pipelines.paths_pipeline.instance_buffer,
             &sprites,
         )?;
         update_paths_vertex_capacity(
@@ -328,32 +310,40 @@ impl DirectXRenderer {
             &self.pipelines.paths_pipeline.vertex_buffer,
             &vertices,
         )?;
-        update_indirect_buffer_capacity(
-            &self.pipelines.paths_pipeline,
-            draw_indirect_commands.len(),
-            &self.devices.device,
-        )
-        .map(|input| update_paths_indirect_buffer(&mut self.pipelines.paths_pipeline, input));
-        update_buffer(
-            &self.devices.device_context,
-            &self.pipelines.paths_pipeline.indirect_draw_buffer,
-            &draw_indirect_commands,
-        )?;
-        prepare_indirect_draws(
-            &self.devices.device_context,
-            &self.pipelines.paths_pipeline,
-            &self.context.viewport,
-            &self.globals.global_params_buffer,
-            D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
-        )?;
 
-        for i in 0..paths.len() {
-            draw_indirect(
-                &self.devices.device_context,
-                &self.pipelines.paths_pipeline.indirect_draw_buffer,
-                (i * std::mem::size_of::<DrawInstancedIndirectArgs>()) as u32,
+        let device_context = &self.devices.device_context;
+        unsafe {
+            device_context.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+            device_context.VSSetShader(&self.pipelines.paths_pipeline.vertex, None);
+            device_context.PSSetShader(&self.pipelines.paths_pipeline.fragment, None);
+            device_context.VSSetConstantBuffers(0, Some(&self.globals.global_params_buffer));
+            device_context.PSSetConstantBuffers(0, Some(&self.globals.global_params_buffer));
+            device_context.VSSetShaderResources(
+                2,
+                Some(&[Some(self.pipelines.paths_pipeline.instance_view.clone())]),
+            );
+            device_context.PSSetShaderResources(
+                2,
+                Some(&[Some(self.pipelines.paths_pipeline.instance_view.clone())]),
             );
+            device_context.PSSetSamplers(0, Some(&self.globals.sampler));
+            device_context.OMSetBlendState(&self.globals.blend_state, None, 0xffffffff);
+            let stride = std::mem::size_of::<DirectXPathVertex>() as u32;
+            let offset = 0u32;
+            device_context.IASetVertexBuffers(
+                0,
+                1,
+                Some([Some(self.pipelines.paths_pipeline.vertex_buffer.clone())].as_ptr()),
+                Some(&stride),
+                Some(&offset),
+            );
+            device_context.IASetInputLayout(&self.pipelines.paths_pipeline.input_layout);
         }
+
+        unsafe {
+            device_context.Draw(vertices.len() as u32, 0);
+        }
+
         Ok(())
     }
 
@@ -627,19 +617,18 @@ struct PipelineState {
 struct PathsPipelineState {
     vertex: ID3D11VertexShader,
     fragment: ID3D11PixelShader,
-    buffer: ID3D11Buffer,
-    buffer_size: usize,
+    instance_buffer: ID3D11Buffer,
+    instance_buffer_size: usize,
     vertex_buffer: ID3D11Buffer,
     vertex_buffer_size: usize,
-    indirect_draw_buffer: ID3D11Buffer,
-    indirect_buffer_size: usize,
-    view: [Option<ID3D11ShaderResourceView>; 1],
-    vertex_view: [Option<ID3D11ShaderResourceView>; 1],
+    instance_view: ID3D11ShaderResourceView,
+    vertex_view: ID3D11ShaderResourceView,
+    input_layout: ID3D11InputLayout,
 }
 
 impl PathsPipelineState {
     fn new(device: &ID3D11Device) -> Result<Self> {
-        let vertex = {
+        let (vertex, shader_blob) = {
             let shader_blob = shader_resources::build_shader_blob("paths_vertex", "vs_5_0")?;
             let bytes = unsafe {
                 std::slice::from_raw_parts(
@@ -647,7 +636,7 @@ impl PathsPipelineState {
                     shader_blob.GetBufferSize(),
                 )
             };
-            create_vertex_shader(device, bytes)?
+            (create_vertex_shader(device, bytes)?, shader_blob)
         };
         let fragment = {
             let shader_blob = shader_resources::build_shader_blob("paths_fragment", "ps_5_0")?;
@@ -659,23 +648,64 @@ impl PathsPipelineState {
             };
             create_fragment_shader(device, bytes)?
         };
-        let buffer = create_buffer(device, std::mem::size_of::<PathSprite>(), 32)?;
-        let view = create_buffer_view(device, &buffer)?;
-        let vertex_buffer =
-            create_buffer(device, std::mem::size_of::<PathVertex<ScaledPixels>>(), 32)?;
+        let instance_buffer = create_buffer(device, std::mem::size_of::<PathSprite>(), 32)?;
+        let instance_view = create_buffer_view(device, &instance_buffer)?;
+        let vertex_buffer = create_buffer(device, std::mem::size_of::<DirectXPathVertex>(), 32)?;
         let vertex_view = create_buffer_view(device, &vertex_buffer)?;
-        let indirect_draw_buffer = create_indirect_draw_buffer(device, 32)?;
+
+        // Create input layout
+        let input_layout = unsafe {
+            let shader_bytes = std::slice::from_raw_parts(
+                shader_blob.GetBufferPointer() as *const u8,
+                shader_blob.GetBufferSize(),
+            );
+            let mut layout = None;
+            device.CreateInputLayout(
+                &[
+                    D3D11_INPUT_ELEMENT_DESC {
+                        SemanticName: windows::core::s!("POSITION"),
+                        SemanticIndex: 0,
+                        Format: DXGI_FORMAT_R32G32_FLOAT,
+                        InputSlot: 0,
+                        AlignedByteOffset: 0,
+                        InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA,
+                        InstanceDataStepRate: 0,
+                    },
+                    D3D11_INPUT_ELEMENT_DESC {
+                        SemanticName: windows::core::s!("TEXCOORD"),
+                        SemanticIndex: 0,
+                        Format: DXGI_FORMAT_R32G32B32A32_FLOAT,
+                        InputSlot: 0,
+                        AlignedByteOffset: 8,
+                        InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA,
+                        InstanceDataStepRate: 0,
+                    },
+                    D3D11_INPUT_ELEMENT_DESC {
+                        SemanticName: windows::core::s!("TEXCOORD"),
+                        SemanticIndex: 1,
+                        Format: DXGI_FORMAT_R32_UINT,
+                        InputSlot: 0,
+                        AlignedByteOffset: 24,
+                        InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA,
+                        InstanceDataStepRate: 0,
+                    },
+                ],
+                shader_bytes,
+                Some(&mut layout),
+            )?;
+            layout.unwrap()
+        };
+
         Ok(Self {
             vertex,
             fragment,
-            buffer,
-            buffer_size: 32,
+            instance_buffer,
+            instance_buffer_size: 32,
             vertex_buffer,
             vertex_buffer_size: 32,
-            indirect_draw_buffer,
-            indirect_buffer_size: 32,
-            view,
+            instance_view,
             vertex_view,
+            input_layout,
         })
     }
 }
@@ -687,6 +717,14 @@ struct PathSprite {
     color: Background,
 }
 
+#[derive(Clone, Debug)]
+#[repr(C)]
+struct DirectXPathVertex {
+    xy_position: Point<ScaledPixels>,
+    content_mask: Bounds<ScaledPixels>,
+    sprite_index: u32,
+}
+
 fn get_dxgi_factory() -> Result<IDXGIFactory6> {
     #[cfg(debug_assertions)]
     let factory_flag = DXGI_CREATE_FACTORY_DEBUG;
@@ -919,7 +957,7 @@ fn create_pipieline(
         fragment,
         buffer,
         buffer_size,
-        view,
+        view: [Some(view)],
     })
 }
 
@@ -960,24 +998,10 @@ fn create_buffer(
 fn create_buffer_view(
     device: &ID3D11Device,
     buffer: &ID3D11Buffer,
-) -> Result<[Option<ID3D11ShaderResourceView>; 1]> {
+) -> Result<ID3D11ShaderResourceView> {
     let mut view = None;
     unsafe { device.CreateShaderResourceView(buffer, None, Some(&mut view)) }?;
-    Ok([view])
-}
-
-fn create_indirect_draw_buffer(device: &ID3D11Device, buffer_size: u32) -> Result<ID3D11Buffer> {
-    let desc = D3D11_BUFFER_DESC {
-        ByteWidth: std::mem::size_of::<DrawInstancedIndirectArgs>() as u32 * buffer_size,
-        Usage: D3D11_USAGE_DYNAMIC,
-        BindFlags: D3D11_BIND_INDEX_BUFFER.0 as u32,
-        CPUAccessFlags: D3D11_CPU_ACCESS_WRITE.0 as u32,
-        MiscFlags: D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS.0 as u32,
-        StructureByteStride: std::mem::size_of::<DrawInstancedIndirectArgs>() as u32,
-    };
-    let mut buffer = None;
-    unsafe { device.CreateBuffer(&desc, None, Some(&mut buffer)) }?;
-    Ok(buffer.unwrap())
+    Ok(view.unwrap())
 }
 
 fn update_global_params(
@@ -1032,15 +1056,15 @@ fn update_buffer_capacity(
     let buffer_size = data_size.next_power_of_two();
     let buffer = create_buffer(device, element_size, buffer_size).unwrap();
     let view = create_buffer_view(device, &buffer).unwrap();
-    Some((buffer, buffer_size, view))
+    Some((buffer, buffer_size, [Some(view)]))
 }
 
 fn update_paths_buffer_capacity(
     pipeline: &PathsPipelineState,
     data_size: usize,
     device: &ID3D11Device,
-) -> Option<(ID3D11Buffer, usize, [Option<ID3D11ShaderResourceView>; 1])> {
-    if pipeline.buffer_size >= data_size {
+) -> Option<(ID3D11Buffer, usize, ID3D11ShaderResourceView)> {
+    if pipeline.instance_buffer_size >= data_size {
         return None;
     }
     let buffer_size = data_size.next_power_of_two();
@@ -1053,14 +1077,14 @@ fn update_paths_vertex_capacity(
     pipeline: &PathsPipelineState,
     vertex_size: usize,
     device: &ID3D11Device,
-) -> Option<(ID3D11Buffer, usize, [Option<ID3D11ShaderResourceView>; 1])> {
+) -> Option<(ID3D11Buffer, usize, ID3D11ShaderResourceView)> {
     if pipeline.vertex_buffer_size >= vertex_size {
         return None;
     }
     let vertex_size = vertex_size.next_power_of_two();
     let buffer = create_buffer(
         device,
-        std::mem::size_of::<PathVertex<ScaledPixels>>(),
+        std::mem::size_of::<DirectXPathVertex>(),
         vertex_size,
     )
     .unwrap();
@@ -1068,19 +1092,6 @@ fn update_paths_vertex_capacity(
     Some((buffer, vertex_size, view))
 }
 
-fn update_indirect_buffer_capacity(
-    pipeline: &PathsPipelineState,
-    data_size: usize,
-    device: &ID3D11Device,
-) -> Option<(ID3D11Buffer, usize)> {
-    if pipeline.indirect_buffer_size >= data_size {
-        return None;
-    }
-    let buffer_size = data_size.next_power_of_two();
-    let buffer = create_indirect_draw_buffer(device, data_size as u32).unwrap();
-    Some((buffer, buffer_size))
-}
-
 fn update_pipeline(
     pipeline: &mut PipelineState,
     input: (ID3D11Buffer, usize, [Option<ID3D11ShaderResourceView>; 1]),
@@ -1092,27 +1103,22 @@ fn update_pipeline(
 
 fn update_paths_pipeline_buffer(
     pipeline: &mut PathsPipelineState,
-    input: (ID3D11Buffer, usize, [Option<ID3D11ShaderResourceView>; 1]),
+    input: (ID3D11Buffer, usize, ID3D11ShaderResourceView),
 ) {
-    pipeline.buffer = input.0;
-    pipeline.buffer_size = input.1;
-    pipeline.view = input.2;
+    pipeline.instance_buffer = input.0;
+    pipeline.instance_buffer_size = input.1;
+    pipeline.instance_view = input.2;
 }
 
 fn update_paths_pipeline_vertex(
     pipeline: &mut PathsPipelineState,
-    input: (ID3D11Buffer, usize, [Option<ID3D11ShaderResourceView>; 1]),
+    input: (ID3D11Buffer, usize, ID3D11ShaderResourceView),
 ) {
     pipeline.vertex_buffer = input.0;
     pipeline.vertex_buffer_size = input.1;
     pipeline.vertex_view = input.2;
 }
 
-fn update_paths_indirect_buffer(pipeline: &mut PathsPipelineState, input: (ID3D11Buffer, usize)) {
-    pipeline.indirect_draw_buffer = input.0;
-    pipeline.indirect_buffer_size = input.1;
-}
-
 fn update_buffer<T>(
     device_context: &ID3D11DeviceContext,
     buffer: &ID3D11Buffer,
@@ -1127,37 +1133,6 @@ fn update_buffer<T>(
     Ok(())
 }
 
-fn prepare_indirect_draws(
-    device_context: &ID3D11DeviceContext,
-    pipeline: &PathsPipelineState,
-    viewport: &[D3D11_VIEWPORT],
-    global_params: &[Option<ID3D11Buffer>],
-    topology: D3D_PRIMITIVE_TOPOLOGY,
-) -> Result<()> {
-    unsafe {
-        device_context.VSSetShaderResources(1, Some(&pipeline.vertex_view));
-        device_context.VSSetShaderResources(2, Some(&pipeline.view));
-        device_context.PSSetShaderResources(2, Some(&pipeline.view));
-        device_context.IASetPrimitiveTopology(topology);
-        device_context.RSSetViewports(Some(viewport));
-        device_context.VSSetShader(&pipeline.vertex, None);
-        device_context.PSSetShader(&pipeline.fragment, None);
-        device_context.VSSetConstantBuffers(0, Some(global_params));
-        device_context.PSSetConstantBuffers(0, Some(global_params));
-    }
-    Ok(())
-}
-
-fn draw_indirect(
-    device_context: &ID3D11DeviceContext,
-    indirect_draw_buffer: &ID3D11Buffer,
-    offset: u32,
-) {
-    unsafe {
-        device_context.DrawInstancedIndirect(indirect_draw_buffer, offset);
-    }
-}
-
 fn draw_normal(
     device_context: &ID3D11DeviceContext,
     pipeline: &PipelineState,

crates/gpui/src/platform/windows/shaders.hlsl 🔗

@@ -256,7 +256,7 @@ float pick_corner_radius(float2 center_to_point, Corners corner_radii) {
     }
 }
 
-float4 to_device_position_transformed(float2 unit_vertex, Bounds bounds, 
+float4 to_device_position_transformed(float2 unit_vertex, Bounds bounds,
                                       TransformationMatrix transformation) {
     float2 position = unit_vertex * bounds.size + bounds.origin;
     float2 transformed = mul(position, transformation.rotation_scale) + transformation.translation;
@@ -876,9 +876,10 @@ float4 shadow_fragment(ShadowFragmentInput input): SV_TARGET {
 **
 */
 
-struct PathVertex {
-    float2 xy_position;
-    Bounds content_mask;
+struct PathVertexInput {
+    float2 xy_position: POSITION;
+    float4 content_mask: TEXCOORD0;
+    uint sprite_index: TEXCOORD1;
 };
 
 struct PathSprite {
@@ -895,17 +896,19 @@ struct PathVertexOutput {
     nointerpolation float4 color1: COLOR2;
 };
 
-StructuredBuffer<PathVertex> path_vertices: register(t1);
 StructuredBuffer<PathSprite> path_sprites: register(t2);
 
-PathVertexOutput paths_vertex(uint vertex_id: SV_VertexID, uint instance_id: SV_InstanceID) {
-    PathVertex v = path_vertices[vertex_id];
-    PathSprite sprite = path_sprites[instance_id];
+PathVertexOutput paths_vertex(PathVertexInput input) {
+    PathSprite sprite = path_sprites[input.sprite_index];
+
+    Bounds content_mask;
+    content_mask.origin = input.content_mask.xy;
+    content_mask.size = input.content_mask.zw;
 
     PathVertexOutput output;
-    output.position = to_device_position_impl(v.xy_position);
-    output.clip_distance = distance_from_clip_rect_impl(v.xy_position, v.content_mask);
-    output.sprite_id = instance_id;
+    output.position = to_device_position_impl(input.xy_position);
+    output.clip_distance = distance_from_clip_rect_impl(input.xy_position, content_mask);
+    output.sprite_id = input.sprite_index;
 
     GradientColor gradient = prepare_gradient_color(
         sprite.color.tag,
@@ -925,7 +928,7 @@ float4 paths_fragment(PathVertexOutput input): SV_Target {
     if (any(input.clip_distance < zero)) {
         return zero;
     }
-    
+
     PathSprite sprite = path_sprites[input.sprite_id];
     Background background = sprite.color;
     float4 color = gradient_color(background, input.position.xy, sprite.bounds,
@@ -968,7 +971,7 @@ UnderlineVertexOutput underline_vertex(uint vertex_id: SV_VertexID, uint underli
     float2 unit_vertex = float2(float(vertex_id & 1u), 0.5 * float(vertex_id & 2u));
     Underline underline = underlines[underline_id];
     float4 device_position = to_device_position(unit_vertex, underline.bounds);
-    float4 clip_distance = distance_from_clip_rect(unit_vertex, underline.bounds, 
+    float4 clip_distance = distance_from_clip_rect(unit_vertex, underline.bounds,
                                                     underline.content_mask);
     float4 color = hsla_to_rgba(underline.color);