diff --git a/voxygen/src/render/buffer.rs b/voxygen/src/render/buffer.rs index f4d80a51cc..5544318053 100644 --- a/voxygen/src/render/buffer.rs +++ b/voxygen/src/render/buffer.rs @@ -45,6 +45,10 @@ impl DynamicBuffer { Self(buffer) } + pub fn new_with_data(device: &wgpu::Device, usage: wgpu::BufferUsage, data: &[T]) -> Self { + Self(Buffer::new(device, usage | wgpu::BufferUsage::COPY_DST, data)) + } + pub fn update(&self, queue: &wgpu::Queue, vals: &[T], offset: usize) { if !vals.is_empty() { queue.write_buffer( diff --git a/voxygen/src/render/instances.rs b/voxygen/src/render/instances.rs index 0638dddbd6..2b1607f4e1 100644 --- a/voxygen/src/render/instances.rs +++ b/voxygen/src/render/instances.rs @@ -15,6 +15,14 @@ impl Instances { } } + pub fn new_with_data(device: &wgpu::Device, data: &[T]) -> Self { + Self { + // TODO: examine if we have Instances that are not updated (e.g. sprites) and if there + // would be any gains from separating those out + buf: DynamicBuffer::new_with_data(device, wgpu::BufferUsage::VERTEX, data), + } + } + // TODO: count vs len naming scheme?? pub fn count(&self) -> usize { self.buf.len() } diff --git a/voxygen/src/render/renderer.rs b/voxygen/src/render/renderer.rs index 047f1199b6..f4e343fbf6 100644 --- a/voxygen/src/render/renderer.rs +++ b/voxygen/src/render/renderer.rs @@ -33,7 +33,7 @@ use super::{ }; use common::assets::{self, AssetExt, AssetHandle, ReloadWatcher}; use common_base::span; -use core::convert::TryFrom; +use core::{sync::atomic::{AtomicUsize, Ordering}, convert::TryFrom}; #[cfg(feature = "egui-ui")] use egui_wgpu_backend::wgpu::TextureFormat; use std::sync::Arc; @@ -46,8 +46,8 @@ use vek::*; // TODO: revert to u16 pub type ColLightInfo = (Vec<[u8; 4]>, Vec2); -const QUAD_INDEX_BUFFER_U16_START_VERT_LEN: u16 = 3000; -const QUAD_INDEX_BUFFER_U32_START_VERT_LEN: u32 = 3000; +const QUAD_INDEX_BUFFER_U16_VERT_LEN: u16 = u16::MAX; +const QUAD_INDEX_BUFFER_U32_START_VERT_LEN: u32 = u16::MAX as u32; /// A type that stores all the layouts associated with this renderer that never /// change when the RenderMode is modified. @@ -160,6 +160,7 @@ pub struct Renderer { noise_tex: Texture, quad_index_buffer_u16: Buffer, + quad_index_buffer_u32_len: Arc, quad_index_buffer_u32: Buffer, shaders: AssetHandle, @@ -494,7 +495,7 @@ impl Renderer { ); let quad_index_buffer_u16 = - create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_START_VERT_LEN as usize); + create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_VERT_LEN.into()); let quad_index_buffer_u32 = create_quad_index_buffer_u32(&device, QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize); let mut profiler = wgpu_profiler::GpuProfiler::new(4, queue.get_timestamp_period()); @@ -526,6 +527,7 @@ impl Renderer { quad_index_buffer_u16, quad_index_buffer_u32, + quad_index_buffer_u32_len: Arc::new(AtomicUsize::new(QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize)), shaders, shaders_watcher, @@ -1258,15 +1260,22 @@ impl Renderer { &mut self, vals: &[T], ) -> Result, RenderError> { - let mut instances = Instances::new(&self.device, vals.len()); - instances.update(&self.queue, vals, 0); - Ok(instances) + Ok(Instances::new_with_data(&self.device, vals)) } - /// Ensure that the quad index buffer is large enough for a quad vertex - /// buffer with this many vertices - pub(super) fn ensure_sufficient_index_length( + /// Create a new set of instances with the provided values lazily (for use off the main + /// thread). + pub fn create_instances_lazy( &mut self, + ) -> impl for<'a> Fn(&'a [T]) -> Instances + Send + Sync { + let device = Arc::clone(&self.device); + move |vals| Instances::new_with_data(&device, &vals) + } + + /// Update the expected index length to be large enough for a quad vertex bfufer with this many + /// vertices. + fn update_index_length( + quad_index_buffer_u32_len: &AtomicUsize, // Length of the vert buffer with 4 verts per quad vert_length: usize, ) { @@ -1274,55 +1283,78 @@ impl Renderer { match V::QUADS_INDEX { Some(wgpu::IndexFormat::Uint16) => { - // Make sure the global quad index buffer is large enough - if self.quad_index_buffer_u16.len() < quad_index_length { - // Make sure we aren't over the max - if vert_length > u16::MAX as usize { - panic!( - "Vertex type: {} needs to use a larger index type, length: {}", - core::any::type_name::(), - vert_length - ); - } - self.quad_index_buffer_u16 = - create_quad_index_buffer_u16(&self.device, vert_length); - } + // Index length is always sufficient. }, Some(wgpu::IndexFormat::Uint32) => { // Make sure the global quad index buffer is large enough + let vert_length = quad_index_length.next_power_of_two(); + if u32::try_from(vert_length).ok() <= Some(0) { + panic!( + "More than u32::MAX({}) verts({}) for type({}) using an index buffer!", + u32::MAX, + vert_length, + core::any::type_name::() + ); + } + + // NOTE: This operation is monotonic, so Relaxed is sufficient. + quad_index_buffer_u32_len.fetch_update( + Ordering::Relaxed, + Ordering::Relaxed, + |old_len| (old_len < quad_index_length).then_some(vert_length), + ); + /* let old_len = quad_index_buffer_u32_len.load(); if self.quad_index_buffer_u32.len() < quad_index_length { // Make sure we aren't over the max - if vert_length > u32::MAX as usize { - panic!( - "More than u32::MAX({}) verts({}) for type({}) using an index buffer!", - u32::MAX, - vert_length, - core::any::type_name::() - ); - } self.quad_index_buffer_u32 = create_quad_index_buffer_u32(&self.device, vert_length); - } + } */ }, None => {}, } } + /// Ensure that the quad index buffer is large enough for all quad vertices that we might + /// render. Should be called before using the index buffers. Only applies to rendering vertex + /// buffers whose creation synchronizes with the call to this function in some other way. + pub(super) fn ensure_sufficient_index_length(&mut self) { + // Make sure the global quad index buffer is large enough + // + // NOTE: Relaxed ordering is fine due to monotonicity, provided that we synchronize any + // rendered buffers with this function call in some other way. + let vert_length = self.quad_index_buffer_u32_len.load(Ordering::Relaxed); + if self.quad_index_buffer_u32.len() < vert_length { + self.quad_index_buffer_u32 = + create_quad_index_buffer_u32(&self.device, vert_length); + } + } + pub fn create_sprite_verts(&mut self, mesh: Mesh) -> sprite::SpriteVerts { - self.ensure_sufficient_index_length::(sprite::VERT_PAGE_SIZE as usize); + Self::update_index_length::(&self.quad_index_buffer_u32_len, sprite::VERT_PAGE_SIZE as usize); sprite::create_verts_buffer(&self.device, mesh) } /// Create a new model from the provided mesh. /// If the provided mesh is empty this returns None pub fn create_model(&mut self, mesh: &Mesh) -> Option> { - self.ensure_sufficient_index_length::(mesh.vertices().len()); + Self::update_index_length::(&self.quad_index_buffer_u32_len, mesh.vertices().len()); Model::new(&self.device, mesh) } + /// Create a new model from the provided mesh, lazily (for use off the main thread). + /// If the provided mesh is empty this returns None + pub fn create_model_lazy(&mut self) -> impl for<'a> Fn(&'a Mesh) -> Option> + Send + Sync { + let device = Arc::clone(&self.device); + let quad_index_buffer_u32_len = Arc::clone(&self.quad_index_buffer_u32_len); + move |mesh| { + Self::update_index_length::(&quad_index_buffer_u32_len, mesh.vertices().len()); + Model::new(&device, mesh) + } + } + /// Create a new dynamic model with the specified size. pub fn create_dynamic_model(&mut self, size: usize) -> DynamicModel { - self.ensure_sufficient_index_length::(size); + Self::update_index_length::(&self.quad_index_buffer_u32_len, size); DynamicModel::new(&self.device, size) } diff --git a/voxygen/src/render/renderer/drawer.rs b/voxygen/src/render/renderer/drawer.rs index 2029b163ae..4722e7717b 100644 --- a/voxygen/src/render/renderer/drawer.rs +++ b/voxygen/src/render/renderer/drawer.rs @@ -87,6 +87,8 @@ impl<'frame> Drawer<'frame> { swap_tex: wgpu::SwapChainTexture, globals: &'frame GlobalsBindGroup, ) -> Self { + renderer.ensure_sufficient_index_length(); + let taking_screenshot = renderer.take_screenshot.take().map(|screenshot_fn| { super::screenshot::TakeScreenshot::new( &renderer.device, diff --git a/voxygen/src/scene/terrain.rs b/voxygen/src/scene/terrain.rs index 5e7815c844..0f680816bf 100644 --- a/voxygen/src/scene/terrain.rs +++ b/voxygen/src/scene/terrain.rs @@ -115,8 +115,8 @@ struct ChunkMeshState { pub struct MeshWorkerResponseMesh { z_bounds: (f32, f32), shadow_z_bounds: (f32, f32), - opaque_mesh: Mesh, - fluid_mesh: Mesh, + opaque_model: Option>, + fluid_model: Option>, col_lights_info: ColLightInfo, light_map: LightMapFn, glow_map: LightMapFn, @@ -126,7 +126,7 @@ pub struct MeshWorkerResponseMesh { /// mesh of a chunk. struct MeshWorkerResponse { pos: Vec2, - sprite_instances: [Vec; SPRITE_LOD_LEVELS], + sprite_instances: [Instances; SPRITE_LOD_LEVELS], /// If None, this update was requested without meshing. mesh: Option, started_tick: u64, @@ -238,6 +238,9 @@ fn mesh_worker + RectRasterableVol + ReadVol + Debug + ' range: Aabb, sprite_data: &HashMap<(SpriteKind, usize), [SpriteData; SPRITE_LOD_LEVELS]>, sprite_config: &SpriteSpec, + create_opaque: impl for<'a> Fn(&'a Mesh) -> Option> + Send + Sync, + create_fluid: impl for<'a> Fn(&'a Mesh) -> Option> + Send + Sync, + create_instances: impl for<'a> Fn(&'a [SpriteInstance]) -> Instances + Send + Sync, ) -> MeshWorkerResponse { span!(_guard, "mesh_worker"); let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/; @@ -271,8 +274,8 @@ fn mesh_worker + RectRasterableVol + ReadVol + Debug + ' // we can ignore such cases for the purposes of determining a shadow bounding box (but // not visibility, unfortunately). shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)), - opaque_mesh, - fluid_mesh, + opaque_model: create_opaque(&opaque_mesh), + fluid_model: create_fluid(&fluid_mesh), col_lights_info, light_map, glow_map, @@ -355,7 +358,7 @@ fn mesh_worker + RectRasterableVol + ReadVol + Debug + ' } */ } - instances + instances.map(|instances| create_instances(&instances)) }, mesh, blocks_of_interest, @@ -1201,6 +1204,9 @@ impl Terrain { let sprite_data = Arc::clone(&self.sprite_data); let sprite_config = Arc::clone(&self.sprite_config); let cnt = Arc::clone(&self.mesh_todos_active); + let create_opaque = renderer.create_model_lazy(); + let create_fluid = renderer.create_model_lazy(); + let create_instances = renderer.create_instances_lazy(); cnt.fetch_add(1, Ordering::Relaxed); scene_data .state @@ -1218,6 +1224,9 @@ impl Terrain { aabb, &sprite_data, &sprite_config, + create_opaque, + create_fluid, + create_instances, )); cnt.fetch_sub(1, Ordering::Relaxed); }); @@ -1244,11 +1253,7 @@ impl Terrain { Some(todo) if response.started_tick <= todo.started_tick => { let started_tick = todo.started_tick; - let sprite_instances = response.sprite_instances.map(|instances| { - renderer - .create_instances(&instances) - .expect("Failed to upload chunk sprite instances to the GPU!") - }); + let sprite_instances = response.sprite_instances; if let Some(mesh) = response.mesh { // Full update, insert the whole chunk. @@ -1314,8 +1319,8 @@ impl Terrain { self.insert_chunk(response.pos, TerrainChunkData { load_time, - opaque_model: renderer.create_model(&mesh.opaque_mesh), - fluid_model: renderer.create_model(&mesh.fluid_mesh), + opaque_model: mesh.opaque_model, + fluid_model: mesh.fluid_model, col_lights_alloc: Some(allocation.id), col_lights: Arc::clone(&self.col_lights), light_map: mesh.light_map,