From 4b72838aa9b2a05d011a96774781ca48bcb0751a Mon Sep 17 00:00:00 2001 From: Elabajaba Date: Mon, 5 Feb 2024 17:12:22 -0500 Subject: [PATCH] sort by pipeline then mesh for non transparent passes for massively better batching (#11671) # Objective Bevy does ridiculous amount of drawcalls, and our batching isn't very effective because we sort by distance and only batch if we get multiple of the same object in a row. This can give us slightly better GPU performance when not using the depth prepass (due to less overdraw), but ends up being massively CPU bottlenecked due to doing thousands of unnecessary drawcalls. ## Solution Change the sort functions to sort by pipeline key then by mesh id for large performance gains in more realistic scenes than our stress tests. Pipelines changed: - Opaque3d - Opaque3dDeferred - Opaque3dPrepass ![image](https://github.com/bevyengine/bevy/assets/177631/8c355256-ad86-4b47-81a0-f3906797fe7e) --- ## Changelog - Opaque3d drawing order is now sorted by pipeline and mesh, rather than by distance. This trades off a bit of GPU time in exchange for massively better batching in scenes that aren't only drawing huge amounts of a single object. --- .../src/core_3d/main_opaque_pass_3d_node.rs | 2 +- crates/bevy_core_pipeline/src/core_3d/mod.rs | 13 +++++++------ crates/bevy_core_pipeline/src/deferred/mod.rs | 13 +++++++------ crates/bevy_core_pipeline/src/prepass/mod.rs | 13 +++++++------ crates/bevy_pbr/src/material.rs | 14 ++++++++++---- crates/bevy_pbr/src/prepass/mod.rs | 10 +++++----- 6 files changed, 37 insertions(+), 28 deletions(-) diff --git a/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs b/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs index 804f6afcf8e18c..856040e3cf442a 100644 --- a/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs +++ b/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs @@ -47,7 +47,7 @@ impl ViewNode for MainOpaquePass3dNode { ): QueryItem, world: &World, ) -> Result<(), NodeRunError> { - // Run the opaque pass, sorted front-to-back + // Run the opaque pass, sorted by pipeline key and mesh id to greatly improve batching. // NOTE: Scoped to drop the mutable borrow of render_context #[cfg(feature = "trace")] let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered(); diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs index 9e542d6c3240a5..eaeedac2c31c53 100644 --- a/crates/bevy_core_pipeline/src/core_3d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs @@ -40,6 +40,7 @@ pub const CORE_3D_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth32Float; use std::{cmp::Reverse, ops::Range}; +use bevy_asset::AssetId; pub use camera_3d::*; pub use main_opaque_pass_3d_node::*; pub use main_transparent_pass_3d_node::*; @@ -50,6 +51,7 @@ use bevy_render::{ camera::{Camera, ExtractedCamera}, color::Color, extract_component::ExtractComponentPlugin, + mesh::Mesh, prelude::Msaa, render_graph::{EmptyNode, RenderGraphApp, ViewNodeRunner}, render_phase::{ @@ -182,7 +184,7 @@ impl Plugin for Core3dPlugin { } pub struct Opaque3d { - pub distance: f32, + pub asset_id: AssetId, pub pipeline: CachedRenderPipelineId, pub entity: Entity, pub draw_function: DrawFunctionId, @@ -191,8 +193,7 @@ pub struct Opaque3d { } impl PhaseItem for Opaque3d { - // NOTE: Values increase towards the camera. Front-to-back ordering for opaque means we need a descending sort. - type SortKey = Reverse; + type SortKey = (usize, AssetId); #[inline] fn entity(&self) -> Entity { @@ -201,7 +202,8 @@ impl PhaseItem for Opaque3d { #[inline] fn sort_key(&self) -> Self::SortKey { - Reverse(FloatOrd(self.distance)) + // Sort by pipeline, then by mesh to massively decrease drawcall counts in real scenes. + (self.pipeline.id(), self.asset_id) } #[inline] @@ -211,8 +213,7 @@ impl PhaseItem for Opaque3d { #[inline] fn sort(items: &mut [Self]) { - // Key negated to match reversed SortKey ordering - radsort::sort_by_key(items, |item| -item.distance); + items.sort_unstable_by_key(Self::sort_key); } #[inline] diff --git a/crates/bevy_core_pipeline/src/deferred/mod.rs b/crates/bevy_core_pipeline/src/deferred/mod.rs index a8a56e39a1163b..bd5dfed704c08b 100644 --- a/crates/bevy_core_pipeline/src/deferred/mod.rs +++ b/crates/bevy_core_pipeline/src/deferred/mod.rs @@ -3,8 +3,10 @@ pub mod node; use std::{cmp::Reverse, ops::Range}; +use bevy_asset::AssetId; use bevy_ecs::prelude::*; use bevy_render::{ + mesh::Mesh, render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem}, render_resource::{CachedRenderPipelineId, TextureFormat}, }; @@ -20,8 +22,8 @@ pub const DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT: TextureFormat = TextureFormat: /// /// Used to render all 3D meshes with materials that have no transparency. pub struct Opaque3dDeferred { - pub distance: f32, pub entity: Entity, + pub asset_id: AssetId, pub pipeline_id: CachedRenderPipelineId, pub draw_function: DrawFunctionId, pub batch_range: Range, @@ -29,8 +31,7 @@ pub struct Opaque3dDeferred { } impl PhaseItem for Opaque3dDeferred { - // NOTE: Values increase towards the camera. Front-to-back ordering for opaque means we need a descending sort. - type SortKey = Reverse; + type SortKey = (usize, AssetId); #[inline] fn entity(&self) -> Entity { @@ -39,7 +40,8 @@ impl PhaseItem for Opaque3dDeferred { #[inline] fn sort_key(&self) -> Self::SortKey { - Reverse(FloatOrd(self.distance)) + // Sort by pipeline, then by mesh to massively decrease drawcall counts in real scenes. + (self.pipeline_id.id(), self.asset_id) } #[inline] @@ -49,8 +51,7 @@ impl PhaseItem for Opaque3dDeferred { #[inline] fn sort(items: &mut [Self]) { - // Key negated to match reversed SortKey ordering - radsort::sort_by_key(items, |item| -item.distance); + items.sort_unstable_by_key(Self::sort_key); } #[inline] diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs index c8d38db50f9fd4..43765336b1463e 100644 --- a/crates/bevy_core_pipeline/src/prepass/mod.rs +++ b/crates/bevy_core_pipeline/src/prepass/mod.rs @@ -29,9 +29,11 @@ pub mod node; use std::{cmp::Reverse, ops::Range}; +use bevy_asset::AssetId; use bevy_ecs::prelude::*; use bevy_reflect::Reflect; use bevy_render::{ + mesh::Mesh, render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem}, render_resource::{CachedRenderPipelineId, Extent3d, TextureFormat, TextureView}, texture::ColorAttachment, @@ -109,8 +111,8 @@ impl ViewPrepassTextures { /// /// Used to render all 3D meshes with materials that have no transparency. pub struct Opaque3dPrepass { - pub distance: f32, pub entity: Entity, + pub asset_id: AssetId, pub pipeline_id: CachedRenderPipelineId, pub draw_function: DrawFunctionId, pub batch_range: Range, @@ -118,8 +120,7 @@ pub struct Opaque3dPrepass { } impl PhaseItem for Opaque3dPrepass { - // NOTE: Values increase towards the camera. Front-to-back ordering for opaque means we need a descending sort. - type SortKey = Reverse; + type SortKey = (usize, AssetId); #[inline] fn entity(&self) -> Entity { @@ -128,7 +129,8 @@ impl PhaseItem for Opaque3dPrepass { #[inline] fn sort_key(&self) -> Self::SortKey { - Reverse(FloatOrd(self.distance)) + // Sort by pipeline, then by mesh to massively decrease drawcall counts in real scenes. + (self.pipeline_id.id(), self.asset_id) } #[inline] @@ -138,8 +140,7 @@ impl PhaseItem for Opaque3dPrepass { #[inline] fn sort(items: &mut [Self]) { - // Key negated to match reversed SortKey ordering - radsort::sort_by_key(items, |item| -item.distance); + items.sort_unstable_by_key(Self::sort_key); } #[inline] diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 8e2b3bd71d91b8..c9b3b5ca578f27 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -639,12 +639,12 @@ pub fn queue_material_meshes( mesh_instance.material_bind_group_id = material.get_bind_group_id(); - let distance = rangefinder - .distance_translation(&mesh_instance.transforms.transform.translation) - + material.properties.depth_bias; match material.properties.alpha_mode { AlphaMode::Opaque => { if material.properties.reads_view_transmission_texture { + let distance = rangefinder + .distance_translation(&mesh_instance.transforms.transform.translation) + + material.properties.depth_bias; transmissive_phase.add(Transmissive3d { entity: *visible_entity, draw_function: draw_transmissive_pbr, @@ -658,13 +658,16 @@ pub fn queue_material_meshes( entity: *visible_entity, draw_function: draw_opaque_pbr, pipeline: pipeline_id, - distance, + asset_id: mesh_instance.mesh_asset_id, batch_range: 0..1, dynamic_offset: None, }); } } AlphaMode::Mask(_) => { + let distance = rangefinder + .distance_translation(&mesh_instance.transforms.transform.translation) + + material.properties.depth_bias; if material.properties.reads_view_transmission_texture { transmissive_phase.add(Transmissive3d { entity: *visible_entity, @@ -689,6 +692,9 @@ pub fn queue_material_meshes( | AlphaMode::Premultiplied | AlphaMode::Add | AlphaMode::Multiply => { + let distance = rangefinder + .distance_translation(&mesh_instance.transforms.transform.translation) + + material.properties.depth_bias; transparent_phase.add(Transparent3d { entity: *visible_entity, draw_function: draw_transparent_pbr, diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 6c8eef1bf60ab7..00380acdc84ac5 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -835,9 +835,6 @@ pub fn queue_prepass_material_meshes( } }; - let distance = rangefinder - .distance_translation(&mesh_instance.transforms.transform.translation) - + material.properties.depth_bias; match alpha_mode { AlphaMode::Opaque => { if deferred { @@ -848,7 +845,7 @@ pub fn queue_prepass_material_meshes( entity: *visible_entity, draw_function: opaque_draw_deferred, pipeline_id, - distance, + asset_id: mesh_instance.mesh_asset_id, batch_range: 0..1, dynamic_offset: None, }); @@ -857,13 +854,16 @@ pub fn queue_prepass_material_meshes( entity: *visible_entity, draw_function: opaque_draw_prepass, pipeline_id, - distance, + asset_id: mesh_instance.mesh_asset_id, batch_range: 0..1, dynamic_offset: None, }); } } AlphaMode::Mask(_) => { + let distance = rangefinder + .distance_translation(&mesh_instance.transforms.transform.translation) + + material.properties.depth_bias; if deferred { alpha_mask_deferred_phase .as_mut()