diff --git a/crates/gpui/src/platform/windows/directx_renderer.rs b/crates/gpui/src/platform/windows/directx_renderer.rs index 542c90444d0edc7d15fa7bfd3c67281ef9618c1b..4c8642e7c236aec6ca8790b29d20c32b18128e18 100644 --- a/crates/gpui/src/platform/windows/directx_renderer.rs +++ b/crates/gpui/src/platform/windows/directx_renderer.rs @@ -59,6 +59,14 @@ struct DirectXGlobalElements { blend_state: ID3D11BlendState, } +#[repr(C)] +struct DrawInstancedIndirectArgs { + vertex_count_per_instance: u32, + instance_count: u32, + start_vertex_location: u32, + start_instance_location: u32, +} + // #[cfg(not(feature = "enable-renderdoc"))] // struct DirectComposition { // comp_device: IDCompositionDevice, @@ -273,12 +281,22 @@ impl DirectXRenderer { } let mut vertices = Vec::new(); let mut sprites = Vec::with_capacity(paths.len()); - + let mut draw_indirect_commands = Vec::with_capacity(paths.len()); + let mut start_vertex_location = 0; for (i, path) in paths.iter().enumerate() { - vertices.extend(path.vertices.iter().map(|v| DirectXPathVertex { + draw_indirect_commands.push(DrawInstancedIndirectArgs { + vertex_count_per_instance: path.vertices.len() as u32, + instance_count: 1, + start_vertex_location, + start_instance_location: i as u32, + }); + start_vertex_location += path.vertices.len() as u32; + + vertices.extend(path.vertices.iter().map(|v| PathVertex { xy_position: v.xy_position, - content_mask: path.content_mask.bounds, - sprite_index: i as u32, + content_mask: ContentMask { + bounds: path.content_mask.bounds, + }, })); sprites.push(PathSprite { @@ -295,7 +313,7 @@ impl DirectXRenderer { .map(|input| update_paths_pipeline_buffer(&mut self.pipelines.paths_pipeline, input)); update_buffer( &self.devices.device_context, - &self.pipelines.paths_pipeline.instance_buffer, + &self.pipelines.paths_pipeline.buffer, &sprites, )?; update_paths_vertex_capacity( @@ -309,40 +327,32 @@ impl DirectXRenderer { &self.pipelines.paths_pipeline.vertex_buffer, &vertices, )?; + update_indirect_buffer_capacity( + &self.pipelines.paths_pipeline, + draw_indirect_commands.len(), + &self.devices.device, + ) + .map(|input| update_paths_indirect_buffer(&mut self.pipelines.paths_pipeline, input)); + update_buffer( + &self.devices.device_context, + &self.pipelines.paths_pipeline.indirect_draw_buffer, + &draw_indirect_commands, + )?; + prepare_indirect_draws( + &self.devices.device_context, + &self.pipelines.paths_pipeline, + &self.context.viewport, + &self.globals.global_params_buffer, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + )?; - let device_context = &self.devices.device_context; - unsafe { - device_context.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - device_context.VSSetShader(&self.pipelines.paths_pipeline.vertex, None); - device_context.PSSetShader(&self.pipelines.paths_pipeline.fragment, None); - device_context.VSSetConstantBuffers(0, Some(&self.globals.global_params_buffer)); - device_context.PSSetConstantBuffers(0, Some(&self.globals.global_params_buffer)); - device_context.VSSetShaderResources( - 2, - Some(&[Some(self.pipelines.paths_pipeline.instance_view.clone())]), - ); - device_context.PSSetShaderResources( - 2, - Some(&[Some(self.pipelines.paths_pipeline.instance_view.clone())]), + for i in 0..paths.len() { + draw_indirect( + &self.devices.device_context, + &self.pipelines.paths_pipeline.indirect_draw_buffer, + (i * std::mem::size_of::()) as u32, ); - device_context.PSSetSamplers(0, Some(&self.globals.sampler)); - device_context.OMSetBlendState(&self.globals.blend_state, None, 0xffffffff); - let stride = std::mem::size_of::() as u32; - let offset = 0u32; - device_context.IASetVertexBuffers( - 0, - 1, - Some([Some(self.pipelines.paths_pipeline.vertex_buffer.clone())].as_ptr()), - Some(&stride), - Some(&offset), - ); - device_context.IASetInputLayout(&self.pipelines.paths_pipeline.input_layout); } - - unsafe { - device_context.Draw(vertices.len() as u32, 0); - } - Ok(()) } @@ -616,18 +626,19 @@ struct PipelineState { struct PathsPipelineState { vertex: ID3D11VertexShader, fragment: ID3D11PixelShader, - instance_buffer: ID3D11Buffer, - instance_buffer_size: usize, + buffer: ID3D11Buffer, + buffer_size: usize, vertex_buffer: ID3D11Buffer, vertex_buffer_size: usize, - instance_view: ID3D11ShaderResourceView, - vertex_view: ID3D11ShaderResourceView, - input_layout: ID3D11InputLayout, + indirect_draw_buffer: ID3D11Buffer, + indirect_buffer_size: usize, + view: [Option; 1], + vertex_view: [Option; 1], } impl PathsPipelineState { fn new(device: &ID3D11Device) -> Result { - let (vertex, shader_blob) = { + let vertex = { let shader_blob = shader_resources::build_shader_blob("paths_vertex", "vs_5_0")?; let bytes = unsafe { std::slice::from_raw_parts( @@ -635,7 +646,7 @@ impl PathsPipelineState { shader_blob.GetBufferSize(), ) }; - (create_vertex_shader(device, bytes)?, shader_blob) + create_vertex_shader(device, bytes)? }; let fragment = { let shader_blob = shader_resources::build_shader_blob("paths_fragment", "ps_5_0")?; @@ -647,64 +658,23 @@ impl PathsPipelineState { }; create_fragment_shader(device, bytes)? }; - let instance_buffer = create_buffer(device, std::mem::size_of::(), 32)?; - let instance_view = create_buffer_view(device, &instance_buffer)?; - let vertex_buffer = create_buffer(device, std::mem::size_of::(), 32)?; + let buffer = create_buffer(device, std::mem::size_of::(), 32)?; + let view = create_buffer_view(device, &buffer)?; + let vertex_buffer = + create_buffer(device, std::mem::size_of::>(), 32)?; let vertex_view = create_buffer_view(device, &vertex_buffer)?; - - // Create input layout - let input_layout = unsafe { - let shader_bytes = std::slice::from_raw_parts( - shader_blob.GetBufferPointer() as *const u8, - shader_blob.GetBufferSize(), - ); - let mut layout = None; - device.CreateInputLayout( - &[ - D3D11_INPUT_ELEMENT_DESC { - SemanticName: windows::core::s!("POSITION"), - SemanticIndex: 0, - Format: DXGI_FORMAT_R32G32_FLOAT, - InputSlot: 0, - AlignedByteOffset: 0, - InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA, - InstanceDataStepRate: 0, - }, - D3D11_INPUT_ELEMENT_DESC { - SemanticName: windows::core::s!("TEXCOORD"), - SemanticIndex: 0, - Format: DXGI_FORMAT_R32G32B32A32_FLOAT, - InputSlot: 0, - AlignedByteOffset: 8, - InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA, - InstanceDataStepRate: 0, - }, - D3D11_INPUT_ELEMENT_DESC { - SemanticName: windows::core::s!("TEXCOORD"), - SemanticIndex: 1, - Format: DXGI_FORMAT_R32_UINT, - InputSlot: 0, - AlignedByteOffset: 24, - InputSlotClass: D3D11_INPUT_PER_VERTEX_DATA, - InstanceDataStepRate: 0, - }, - ], - shader_bytes, - Some(&mut layout), - )?; - layout.unwrap() - }; - + let indirect_draw_buffer = create_indirect_draw_buffer(device, 32)?; Ok(Self { vertex, fragment, - instance_buffer, - instance_buffer_size: 32, + buffer, + buffer_size: 32, vertex_buffer, vertex_buffer_size: 32, - instance_view, + indirect_draw_buffer, + indirect_buffer_size: 32, + view, vertex_view, - input_layout, }) } } @@ -716,14 +686,6 @@ struct PathSprite { color: Background, } -#[derive(Clone, Debug)] -#[repr(C)] -struct DirectXPathVertex { - xy_position: Point, - content_mask: Bounds, - sprite_index: u32, -} - fn get_dxgi_factory() -> Result { #[cfg(debug_assertions)] let factory_flag = DXGI_CREATE_FACTORY_DEBUG; @@ -956,7 +918,7 @@ fn create_pipieline( fragment, buffer, buffer_size, - view: [Some(view)], + view, }) } @@ -997,10 +959,24 @@ fn create_buffer( fn create_buffer_view( device: &ID3D11Device, buffer: &ID3D11Buffer, -) -> Result { +) -> Result<[Option; 1]> { let mut view = None; unsafe { device.CreateShaderResourceView(buffer, None, Some(&mut view)) }?; - Ok(view.unwrap()) + Ok([view]) +} + +fn create_indirect_draw_buffer(device: &ID3D11Device, buffer_size: u32) -> Result { + let desc = D3D11_BUFFER_DESC { + ByteWidth: std::mem::size_of::() as u32 * buffer_size, + Usage: D3D11_USAGE_DYNAMIC, + BindFlags: D3D11_BIND_INDEX_BUFFER.0 as u32, + CPUAccessFlags: D3D11_CPU_ACCESS_WRITE.0 as u32, + MiscFlags: D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS.0 as u32, + StructureByteStride: std::mem::size_of::() as u32, + }; + let mut buffer = None; + unsafe { device.CreateBuffer(&desc, None, Some(&mut buffer)) }?; + Ok(buffer.unwrap()) } fn update_global_params( @@ -1055,15 +1031,15 @@ fn update_buffer_capacity( let buffer_size = data_size.next_power_of_two(); let buffer = create_buffer(device, element_size, buffer_size).unwrap(); let view = create_buffer_view(device, &buffer).unwrap(); - Some((buffer, buffer_size, [Some(view)])) + Some((buffer, buffer_size, view)) } fn update_paths_buffer_capacity( pipeline: &PathsPipelineState, data_size: usize, device: &ID3D11Device, -) -> Option<(ID3D11Buffer, usize, ID3D11ShaderResourceView)> { - if pipeline.instance_buffer_size >= data_size { +) -> Option<(ID3D11Buffer, usize, [Option; 1])> { + if pipeline.buffer_size >= data_size { return None; } let buffer_size = data_size.next_power_of_two(); @@ -1076,14 +1052,14 @@ fn update_paths_vertex_capacity( pipeline: &PathsPipelineState, vertex_size: usize, device: &ID3D11Device, -) -> Option<(ID3D11Buffer, usize, ID3D11ShaderResourceView)> { +) -> Option<(ID3D11Buffer, usize, [Option; 1])> { if pipeline.vertex_buffer_size >= vertex_size { return None; } let vertex_size = vertex_size.next_power_of_two(); let buffer = create_buffer( device, - std::mem::size_of::(), + std::mem::size_of::>(), vertex_size, ) .unwrap(); @@ -1091,6 +1067,19 @@ fn update_paths_vertex_capacity( Some((buffer, vertex_size, view)) } +fn update_indirect_buffer_capacity( + pipeline: &PathsPipelineState, + data_size: usize, + device: &ID3D11Device, +) -> Option<(ID3D11Buffer, usize)> { + if pipeline.indirect_buffer_size >= data_size { + return None; + } + let buffer_size = data_size.next_power_of_two(); + let buffer = create_indirect_draw_buffer(device, data_size as u32).unwrap(); + Some((buffer, buffer_size)) +} + fn update_pipeline( pipeline: &mut PipelineState, input: (ID3D11Buffer, usize, [Option; 1]), @@ -1102,22 +1091,27 @@ fn update_pipeline( fn update_paths_pipeline_buffer( pipeline: &mut PathsPipelineState, - input: (ID3D11Buffer, usize, ID3D11ShaderResourceView), + input: (ID3D11Buffer, usize, [Option; 1]), ) { - pipeline.instance_buffer = input.0; - pipeline.instance_buffer_size = input.1; - pipeline.instance_view = input.2; + pipeline.buffer = input.0; + pipeline.buffer_size = input.1; + pipeline.view = input.2; } fn update_paths_pipeline_vertex( pipeline: &mut PathsPipelineState, - input: (ID3D11Buffer, usize, ID3D11ShaderResourceView), + input: (ID3D11Buffer, usize, [Option; 1]), ) { pipeline.vertex_buffer = input.0; pipeline.vertex_buffer_size = input.1; pipeline.vertex_view = input.2; } +fn update_paths_indirect_buffer(pipeline: &mut PathsPipelineState, input: (ID3D11Buffer, usize)) { + pipeline.indirect_draw_buffer = input.0; + pipeline.indirect_buffer_size = input.1; +} + fn update_buffer( device_context: &ID3D11DeviceContext, buffer: &ID3D11Buffer, @@ -1132,6 +1126,37 @@ fn update_buffer( Ok(()) } +fn prepare_indirect_draws( + device_context: &ID3D11DeviceContext, + pipeline: &PathsPipelineState, + viewport: &[D3D11_VIEWPORT], + global_params: &[Option], + topology: D3D_PRIMITIVE_TOPOLOGY, +) -> Result<()> { + unsafe { + device_context.VSSetShaderResources(1, Some(&pipeline.vertex_view)); + device_context.VSSetShaderResources(2, Some(&pipeline.view)); + device_context.PSSetShaderResources(2, Some(&pipeline.view)); + device_context.IASetPrimitiveTopology(topology); + device_context.RSSetViewports(Some(viewport)); + device_context.VSSetShader(&pipeline.vertex, None); + device_context.PSSetShader(&pipeline.fragment, None); + device_context.VSSetConstantBuffers(0, Some(global_params)); + device_context.PSSetConstantBuffers(0, Some(global_params)); + } + Ok(()) +} + +fn draw_indirect( + device_context: &ID3D11DeviceContext, + indirect_draw_buffer: &ID3D11Buffer, + offset: u32, +) { + unsafe { + device_context.DrawInstancedIndirect(indirect_draw_buffer, offset); + } +} + fn draw_normal( device_context: &ID3D11DeviceContext, pipeline: &PipelineState, diff --git a/crates/gpui/src/platform/windows/shaders.hlsl b/crates/gpui/src/platform/windows/shaders.hlsl index 18760dcf78ae66bdefece0a6ae699c3b2231a76a..3438e708a3a38154354120388f1babc3ec8ec020 100644 --- a/crates/gpui/src/platform/windows/shaders.hlsl +++ b/crates/gpui/src/platform/windows/shaders.hlsl @@ -256,7 +256,7 @@ float pick_corner_radius(float2 center_to_point, Corners corner_radii) { } } -float4 to_device_position_transformed(float2 unit_vertex, Bounds bounds, +float4 to_device_position_transformed(float2 unit_vertex, Bounds bounds, TransformationMatrix transformation) { float2 position = unit_vertex * bounds.size + bounds.origin; float2 transformed = mul(position, transformation.rotation_scale) + transformation.translation; @@ -876,10 +876,9 @@ float4 shadow_fragment(ShadowFragmentInput input): SV_TARGET { ** */ -struct PathVertexInput { - float2 xy_position: POSITION; - float4 content_mask: TEXCOORD0; - uint sprite_index: TEXCOORD1; +struct PathVertex { + float2 xy_position; + Bounds content_mask; }; struct PathSprite { @@ -904,19 +903,17 @@ struct PathFragmentInput { nointerpolation float4 color1: COLOR2; }; +StructuredBuffer path_vertices: register(t1); StructuredBuffer path_sprites: register(t2); -PathVertexOutput paths_vertex(PathVertexInput input) { - PathSprite sprite = path_sprites[input.sprite_index]; - - Bounds content_mask; - content_mask.origin = input.content_mask.xy; - content_mask.size = input.content_mask.zw; +PathVertexOutput paths_vertex(uint vertex_id: SV_VertexID, uint instance_id: SV_InstanceID) { + PathVertex v = path_vertices[vertex_id]; + PathSprite sprite = path_sprites[instance_id]; PathVertexOutput output; - output.position = to_device_position_impl(input.xy_position); - output.clip_distance = distance_from_clip_rect_impl(input.xy_position, content_mask); - output.sprite_id = input.sprite_index; + output.position = to_device_position_impl(v.xy_position); + output.clip_distance = distance_from_clip_rect_impl(v.xy_position, v.content_mask); + output.sprite_id = instance_id; GradientColor gradient = prepare_gradient_color( sprite.color.tag, @@ -974,7 +971,7 @@ UnderlineVertexOutput underline_vertex(uint vertex_id: SV_VertexID, uint underli float2 unit_vertex = float2(float(vertex_id & 1u), 0.5 * float(vertex_id & 2u)); Underline underline = underlines[underline_id]; float4 device_position = to_device_position(unit_vertex, underline.bounds); - float4 clip_distance = distance_from_clip_rect(unit_vertex, underline.bounds, + float4 clip_distance = distance_from_clip_rect(unit_vertex, underline.bounds, underline.content_mask); float4 color = hsla_to_rgba(underline.color);