Perform nearly all chunk allocations in the same thread that the chunk

is generated in.
This commit is contained in:
Joshua Yanovski 2022-08-04 19:00:42 -07:00
parent 04bb1e32d9
commit c577c21156
5 changed files with 99 additions and 48 deletions

View File

@ -45,6 +45,10 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
Self(buffer) Self(buffer)
} }
pub fn new_with_data(device: &wgpu::Device, usage: wgpu::BufferUsage, data: &[T]) -> Self {
Self(Buffer::new(device, usage | wgpu::BufferUsage::COPY_DST, data))
}
pub fn update(&self, queue: &wgpu::Queue, vals: &[T], offset: usize) { pub fn update(&self, queue: &wgpu::Queue, vals: &[T], offset: usize) {
if !vals.is_empty() { if !vals.is_empty() {
queue.write_buffer( queue.write_buffer(

View File

@ -15,6 +15,14 @@ impl<T: Copy + Pod> Instances<T> {
} }
} }
pub fn new_with_data(device: &wgpu::Device, data: &[T]) -> Self {
Self {
// TODO: examine if we have Instances that are not updated (e.g. sprites) and if there
// would be any gains from separating those out
buf: DynamicBuffer::new_with_data(device, wgpu::BufferUsage::VERTEX, data),
}
}
// TODO: count vs len naming scheme?? // TODO: count vs len naming scheme??
pub fn count(&self) -> usize { self.buf.len() } pub fn count(&self) -> usize { self.buf.len() }

View File

@ -33,7 +33,7 @@ use super::{
}; };
use common::assets::{self, AssetExt, AssetHandle, ReloadWatcher}; use common::assets::{self, AssetExt, AssetHandle, ReloadWatcher};
use common_base::span; use common_base::span;
use core::convert::TryFrom; use core::{sync::atomic::{AtomicUsize, Ordering}, convert::TryFrom};
#[cfg(feature = "egui-ui")] #[cfg(feature = "egui-ui")]
use egui_wgpu_backend::wgpu::TextureFormat; use egui_wgpu_backend::wgpu::TextureFormat;
use std::sync::Arc; use std::sync::Arc;
@ -46,8 +46,8 @@ use vek::*;
// TODO: revert to u16 // TODO: revert to u16
pub type ColLightInfo = (Vec<[u8; 4]>, Vec2<u16>); pub type ColLightInfo = (Vec<[u8; 4]>, Vec2<u16>);
const QUAD_INDEX_BUFFER_U16_START_VERT_LEN: u16 = 3000; const QUAD_INDEX_BUFFER_U16_VERT_LEN: u16 = u16::MAX;
const QUAD_INDEX_BUFFER_U32_START_VERT_LEN: u32 = 3000; const QUAD_INDEX_BUFFER_U32_START_VERT_LEN: u32 = u16::MAX as u32;
/// A type that stores all the layouts associated with this renderer that never /// A type that stores all the layouts associated with this renderer that never
/// change when the RenderMode is modified. /// change when the RenderMode is modified.
@ -160,6 +160,7 @@ pub struct Renderer {
noise_tex: Texture, noise_tex: Texture,
quad_index_buffer_u16: Buffer<u16>, quad_index_buffer_u16: Buffer<u16>,
quad_index_buffer_u32_len: Arc<AtomicUsize>,
quad_index_buffer_u32: Buffer<u32>, quad_index_buffer_u32: Buffer<u32>,
shaders: AssetHandle<Shaders>, shaders: AssetHandle<Shaders>,
@ -494,7 +495,7 @@ impl Renderer {
); );
let quad_index_buffer_u16 = let quad_index_buffer_u16 =
create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_START_VERT_LEN as usize); create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_VERT_LEN.into());
let quad_index_buffer_u32 = let quad_index_buffer_u32 =
create_quad_index_buffer_u32(&device, QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize); create_quad_index_buffer_u32(&device, QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize);
let mut profiler = wgpu_profiler::GpuProfiler::new(4, queue.get_timestamp_period()); let mut profiler = wgpu_profiler::GpuProfiler::new(4, queue.get_timestamp_period());
@ -526,6 +527,7 @@ impl Renderer {
quad_index_buffer_u16, quad_index_buffer_u16,
quad_index_buffer_u32, quad_index_buffer_u32,
quad_index_buffer_u32_len: Arc::new(AtomicUsize::new(QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize)),
shaders, shaders,
shaders_watcher, shaders_watcher,
@ -1258,15 +1260,22 @@ impl Renderer {
&mut self, &mut self,
vals: &[T], vals: &[T],
) -> Result<Instances<T>, RenderError> { ) -> Result<Instances<T>, RenderError> {
let mut instances = Instances::new(&self.device, vals.len()); Ok(Instances::new_with_data(&self.device, vals))
instances.update(&self.queue, vals, 0);
Ok(instances)
} }
/// Ensure that the quad index buffer is large enough for a quad vertex /// Create a new set of instances with the provided values lazily (for use off the main
/// buffer with this many vertices /// thread).
pub(super) fn ensure_sufficient_index_length<V: Vertex>( pub fn create_instances_lazy<T: Copy + bytemuck::Pod>(
&mut self, &mut self,
) -> impl for<'a> Fn(&'a [T]) -> Instances<T> + Send + Sync {
let device = Arc::clone(&self.device);
move |vals| Instances::new_with_data(&device, &vals)
}
/// Update the expected index length to be large enough for a quad vertex bfufer with this many
/// vertices.
fn update_index_length<V: Vertex>(
quad_index_buffer_u32_len: &AtomicUsize,
// Length of the vert buffer with 4 verts per quad // Length of the vert buffer with 4 verts per quad
vert_length: usize, vert_length: usize,
) { ) {
@ -1274,55 +1283,78 @@ impl Renderer {
match V::QUADS_INDEX { match V::QUADS_INDEX {
Some(wgpu::IndexFormat::Uint16) => { Some(wgpu::IndexFormat::Uint16) => {
// Make sure the global quad index buffer is large enough // Index length is always sufficient.
if self.quad_index_buffer_u16.len() < quad_index_length {
// Make sure we aren't over the max
if vert_length > u16::MAX as usize {
panic!(
"Vertex type: {} needs to use a larger index type, length: {}",
core::any::type_name::<V>(),
vert_length
);
}
self.quad_index_buffer_u16 =
create_quad_index_buffer_u16(&self.device, vert_length);
}
}, },
Some(wgpu::IndexFormat::Uint32) => { Some(wgpu::IndexFormat::Uint32) => {
// Make sure the global quad index buffer is large enough // Make sure the global quad index buffer is large enough
let vert_length = quad_index_length.next_power_of_two();
if u32::try_from(vert_length).ok() <= Some(0) {
panic!(
"More than u32::MAX({}) verts({}) for type({}) using an index buffer!",
u32::MAX,
vert_length,
core::any::type_name::<V>()
);
}
// NOTE: This operation is monotonic, so Relaxed is sufficient.
quad_index_buffer_u32_len.fetch_update(
Ordering::Relaxed,
Ordering::Relaxed,
|old_len| (old_len < quad_index_length).then_some(vert_length),
);
/* let old_len = quad_index_buffer_u32_len.load();
if self.quad_index_buffer_u32.len() < quad_index_length { if self.quad_index_buffer_u32.len() < quad_index_length {
// Make sure we aren't over the max // Make sure we aren't over the max
if vert_length > u32::MAX as usize {
panic!(
"More than u32::MAX({}) verts({}) for type({}) using an index buffer!",
u32::MAX,
vert_length,
core::any::type_name::<V>()
);
}
self.quad_index_buffer_u32 = self.quad_index_buffer_u32 =
create_quad_index_buffer_u32(&self.device, vert_length); create_quad_index_buffer_u32(&self.device, vert_length);
} } */
}, },
None => {}, None => {},
} }
} }
/// Ensure that the quad index buffer is large enough for all quad vertices that we might
/// render. Should be called before using the index buffers. Only applies to rendering vertex
/// buffers whose creation synchronizes with the call to this function in some other way.
pub(super) fn ensure_sufficient_index_length(&mut self) {
// Make sure the global quad index buffer is large enough
//
// NOTE: Relaxed ordering is fine due to monotonicity, provided that we synchronize any
// rendered buffers with this function call in some other way.
let vert_length = self.quad_index_buffer_u32_len.load(Ordering::Relaxed);
if self.quad_index_buffer_u32.len() < vert_length {
self.quad_index_buffer_u32 =
create_quad_index_buffer_u32(&self.device, vert_length);
}
}
pub fn create_sprite_verts(&mut self, mesh: Mesh<sprite::Vertex>) -> sprite::SpriteVerts { pub fn create_sprite_verts(&mut self, mesh: Mesh<sprite::Vertex>) -> sprite::SpriteVerts {
self.ensure_sufficient_index_length::<sprite::Vertex>(sprite::VERT_PAGE_SIZE as usize); Self::update_index_length::<sprite::Vertex>(&self.quad_index_buffer_u32_len, sprite::VERT_PAGE_SIZE as usize);
sprite::create_verts_buffer(&self.device, mesh) sprite::create_verts_buffer(&self.device, mesh)
} }
/// Create a new model from the provided mesh. /// Create a new model from the provided mesh.
/// If the provided mesh is empty this returns None /// If the provided mesh is empty this returns None
pub fn create_model<V: Vertex>(&mut self, mesh: &Mesh<V>) -> Option<Model<V>> { pub fn create_model<V: Vertex>(&mut self, mesh: &Mesh<V>) -> Option<Model<V>> {
self.ensure_sufficient_index_length::<V>(mesh.vertices().len()); Self::update_index_length::<V>(&self.quad_index_buffer_u32_len, mesh.vertices().len());
Model::new(&self.device, mesh) Model::new(&self.device, mesh)
} }
/// Create a new model from the provided mesh, lazily (for use off the main thread).
/// If the provided mesh is empty this returns None
pub fn create_model_lazy<V: Vertex>(&mut self) -> impl for<'a> Fn(&'a Mesh<V>) -> Option<Model<V>> + Send + Sync {
let device = Arc::clone(&self.device);
let quad_index_buffer_u32_len = Arc::clone(&self.quad_index_buffer_u32_len);
move |mesh| {
Self::update_index_length::<V>(&quad_index_buffer_u32_len, mesh.vertices().len());
Model::new(&device, mesh)
}
}
/// Create a new dynamic model with the specified size. /// Create a new dynamic model with the specified size.
pub fn create_dynamic_model<V: Vertex>(&mut self, size: usize) -> DynamicModel<V> { pub fn create_dynamic_model<V: Vertex>(&mut self, size: usize) -> DynamicModel<V> {
self.ensure_sufficient_index_length::<V>(size); Self::update_index_length::<V>(&self.quad_index_buffer_u32_len, size);
DynamicModel::new(&self.device, size) DynamicModel::new(&self.device, size)
} }

View File

@ -87,6 +87,8 @@ impl<'frame> Drawer<'frame> {
swap_tex: wgpu::SwapChainTexture, swap_tex: wgpu::SwapChainTexture,
globals: &'frame GlobalsBindGroup, globals: &'frame GlobalsBindGroup,
) -> Self { ) -> Self {
renderer.ensure_sufficient_index_length();
let taking_screenshot = renderer.take_screenshot.take().map(|screenshot_fn| { let taking_screenshot = renderer.take_screenshot.take().map(|screenshot_fn| {
super::screenshot::TakeScreenshot::new( super::screenshot::TakeScreenshot::new(
&renderer.device, &renderer.device,

View File

@ -115,8 +115,8 @@ struct ChunkMeshState {
pub struct MeshWorkerResponseMesh { pub struct MeshWorkerResponseMesh {
z_bounds: (f32, f32), z_bounds: (f32, f32),
shadow_z_bounds: (f32, f32), shadow_z_bounds: (f32, f32),
opaque_mesh: Mesh<TerrainVertex>, opaque_model: Option<Model<TerrainVertex>>,
fluid_mesh: Mesh<FluidVertex>, fluid_model: Option<Model<FluidVertex>>,
col_lights_info: ColLightInfo, col_lights_info: ColLightInfo,
light_map: LightMapFn, light_map: LightMapFn,
glow_map: LightMapFn, glow_map: LightMapFn,
@ -126,7 +126,7 @@ pub struct MeshWorkerResponseMesh {
/// mesh of a chunk. /// mesh of a chunk.
struct MeshWorkerResponse { struct MeshWorkerResponse {
pos: Vec2<i32>, pos: Vec2<i32>,
sprite_instances: [Vec<SpriteInstance>; SPRITE_LOD_LEVELS], sprite_instances: [Instances<SpriteInstance>; SPRITE_LOD_LEVELS],
/// If None, this update was requested without meshing. /// If None, this update was requested without meshing.
mesh: Option<MeshWorkerResponseMesh>, mesh: Option<MeshWorkerResponseMesh>,
started_tick: u64, started_tick: u64,
@ -238,6 +238,9 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
range: Aabb<i32>, range: Aabb<i32>,
sprite_data: &HashMap<(SpriteKind, usize), [SpriteData; SPRITE_LOD_LEVELS]>, sprite_data: &HashMap<(SpriteKind, usize), [SpriteData; SPRITE_LOD_LEVELS]>,
sprite_config: &SpriteSpec, sprite_config: &SpriteSpec,
create_opaque: impl for<'a> Fn(&'a Mesh<TerrainVertex>) -> Option<Model<TerrainVertex>> + Send + Sync,
create_fluid: impl for<'a> Fn(&'a Mesh<FluidVertex>) -> Option<Model<FluidVertex>> + Send + Sync,
create_instances: impl for<'a> Fn(&'a [SpriteInstance]) -> Instances<SpriteInstance> + Send + Sync,
) -> MeshWorkerResponse { ) -> MeshWorkerResponse {
span!(_guard, "mesh_worker"); span!(_guard, "mesh_worker");
let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/; let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/;
@ -271,8 +274,8 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
// we can ignore such cases for the purposes of determining a shadow bounding box (but // we can ignore such cases for the purposes of determining a shadow bounding box (but
// not visibility, unfortunately). // not visibility, unfortunately).
shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)), shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)),
opaque_mesh, opaque_model: create_opaque(&opaque_mesh),
fluid_mesh, fluid_model: create_fluid(&fluid_mesh),
col_lights_info, col_lights_info,
light_map, light_map,
glow_map, glow_map,
@ -355,7 +358,7 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
} */ } */
} }
instances instances.map(|instances| create_instances(&instances))
}, },
mesh, mesh,
blocks_of_interest, blocks_of_interest,
@ -1201,6 +1204,9 @@ impl<V: RectRasterableVol> Terrain<V> {
let sprite_data = Arc::clone(&self.sprite_data); let sprite_data = Arc::clone(&self.sprite_data);
let sprite_config = Arc::clone(&self.sprite_config); let sprite_config = Arc::clone(&self.sprite_config);
let cnt = Arc::clone(&self.mesh_todos_active); let cnt = Arc::clone(&self.mesh_todos_active);
let create_opaque = renderer.create_model_lazy();
let create_fluid = renderer.create_model_lazy();
let create_instances = renderer.create_instances_lazy();
cnt.fetch_add(1, Ordering::Relaxed); cnt.fetch_add(1, Ordering::Relaxed);
scene_data scene_data
.state .state
@ -1218,6 +1224,9 @@ impl<V: RectRasterableVol> Terrain<V> {
aabb, aabb,
&sprite_data, &sprite_data,
&sprite_config, &sprite_config,
create_opaque,
create_fluid,
create_instances,
)); ));
cnt.fetch_sub(1, Ordering::Relaxed); cnt.fetch_sub(1, Ordering::Relaxed);
}); });
@ -1244,11 +1253,7 @@ impl<V: RectRasterableVol> Terrain<V> {
Some(todo) if response.started_tick <= todo.started_tick => { Some(todo) if response.started_tick <= todo.started_tick => {
let started_tick = todo.started_tick; let started_tick = todo.started_tick;
let sprite_instances = response.sprite_instances.map(|instances| { let sprite_instances = response.sprite_instances;
renderer
.create_instances(&instances)
.expect("Failed to upload chunk sprite instances to the GPU!")
});
if let Some(mesh) = response.mesh { if let Some(mesh) = response.mesh {
// Full update, insert the whole chunk. // Full update, insert the whole chunk.
@ -1314,8 +1319,8 @@ impl<V: RectRasterableVol> Terrain<V> {
self.insert_chunk(response.pos, TerrainChunkData { self.insert_chunk(response.pos, TerrainChunkData {
load_time, load_time,
opaque_model: renderer.create_model(&mesh.opaque_mesh), opaque_model: mesh.opaque_model,
fluid_model: renderer.create_model(&mesh.fluid_mesh), fluid_model: mesh.fluid_model,
col_lights_alloc: Some(allocation.id), col_lights_alloc: Some(allocation.id),
col_lights: Arc::clone(&self.col_lights), col_lights: Arc::clone(&self.col_lights),
light_map: mesh.light_map, light_map: mesh.light_map,