From 6adfa6680fe29f1818444e186216091a41d5c23d Mon Sep 17 00:00:00 2001 From: Joshua Yanovski Date: Tue, 16 Aug 2022 21:32:03 -0700 Subject: [PATCH] All unmaps on the main thread, sprites consolidated into a buffer. --- voxygen/src/menu/main/scene.rs | 6 +- voxygen/src/mesh/greedy.rs | 42 +++++++--- voxygen/src/mesh/terrain.rs | 13 ++- voxygen/src/render/buffer.rs | 84 ++++++++++--------- voxygen/src/render/consts.rs | 14 ++-- voxygen/src/render/instances.rs | 32 ++++---- voxygen/src/render/mod.rs | 3 +- voxygen/src/render/model.rs | 31 ++++++- voxygen/src/render/pipelines/sprite.rs | 3 + voxygen/src/render/renderer.rs | 109 +++++++++++++++---------- voxygen/src/render/renderer/binding.rs | 23 ++---- voxygen/src/render/renderer/drawer.rs | 4 +- voxygen/src/render/texture.rs | 2 +- voxygen/src/scene/figure/cache.rs | 7 +- voxygen/src/scene/mod.rs | 6 +- voxygen/src/scene/simple.rs | 14 +++- voxygen/src/scene/terrain.rs | 103 +++++++++++++++-------- 17 files changed, 308 insertions(+), 188 deletions(-) diff --git a/voxygen/src/menu/main/scene.rs b/voxygen/src/menu/main/scene.rs index 68d25b8f47..6612242a0e 100644 --- a/voxygen/src/menu/main/scene.rs +++ b/voxygen/src/menu/main/scene.rs @@ -10,9 +10,9 @@ pub struct Scene { impl Scene { pub fn new(renderer: &mut Renderer) -> Self { let global_data = GlobalModel { - globals: renderer.create_consts(&[Globals::default()]), - lights: renderer.create_consts(&[Light::default(); 32]), - shadows: renderer.create_consts(&[Shadow::default(); 32]), + globals: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Globals::default()]), + lights: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Light::default(); 32]), + shadows: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Shadow::default(); 32]), shadow_mats: renderer.create_shadow_bound_locals(&[ShadowLocals::default()]), rain_occlusion_mats: renderer .create_rain_occlusion_bound_locals(&[RainOcclusionLocals::default()]), diff --git a/voxygen/src/mesh/greedy.rs b/voxygen/src/mesh/greedy.rs index cc90ce34f0..b5fc32f87c 100644 --- a/voxygen/src/mesh/greedy.rs +++ b/voxygen/src/mesh/greedy.rs @@ -81,7 +81,7 @@ pub struct GreedyConfig { /// coloring part as a continuation. When called with a final tile size and /// vector, the continuation will consume the color data and write it to the /// vector. -pub type SuspendedMesh<'a> = dyn for<'r> FnOnce(&'r mut ColLightInfo) + 'a; +pub type SuspendedMesh<'a> = dyn for<'r> FnOnce(/*&'r mut ColLightInfo*/(&'r mut [[u8; 4]], Vec2)) + 'a; /// Abstraction over different atlas allocators. Useful to swap out the /// allocator implementation for specific cases (e.g. sprites). @@ -418,24 +418,41 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> { /// are known, we can perform just a single allocation to construct a /// precisely fitting atlas. This will also let us (in the future) /// suspend meshing partway through in order to meet frame budget, and - /// potentially use a single staged upload to the GPU. + /// allows us to use a single staged upload to the GPU. /// - /// Returns the ColLightsInfo corresponding to the constructed atlas. - pub fn finalize(self, alignment: Vec2) -> ColLightInfo { - span!(_guard, "finalize", "GreedyMesh::finalize"); + /// `make_buffer` is the function that produces the buffer to which we draw (which may be + /// either a staging buffer for upload to the GPU, or any other s + /// + /// Returns a tuple containing the size of the required buffer, and a function that, when + /// applied to a buffer allocated with that size, will produce the correct bounds for the + /// texture (which can then be bundled up into a ColLightsInfo, if need be). The reason + /// for this awkward API is to allow consumers to create a mapped buffer with the correct + /// size, then write to it directly, rather than introducing a second staging copy. + pub fn finalize( + self, + alignment: Vec2, + ) -> (usize, impl for<'b> FnOnce(&'b mut [[u8; 4]]) -> Vec2 + 'a) + { let mut cur_size = self.col_lights_size; // Round to nearest alignment (assuming power of 2) cur_size.x = (cur_size.x + alignment.x - 1) / alignment.x * alignment.x; cur_size.y = (cur_size.y + alignment.y - 1) / alignment.y * alignment.y; + /* let col_lights = make_buffer(cur_size.x as usize * cur_size.y as usize); let col_lights = vec![ TerrainVertex::make_col_light(254, 0, Rgb::broadcast(254), true); cur_size.x as usize * cur_size.y as usize - ]; - let mut col_lights_info = (col_lights, cur_size); - self.suspended.into_iter().for_each(|cont| { - cont(&mut col_lights_info); - }); - col_lights_info + ]; */ + let alloc_size = cur_size.x as usize * cur_size.y as usize; + (alloc_size, move |col_lights| { + span!(_guard, "finalize", "GreedyMesh::finalize"); + assert!(col_lights.len() == alloc_size); + self.suspended.into_iter().for_each(move |cont| { + let col_lights_info = (&mut *col_lights, cur_size); + cont(/*&mut */col_lights_info); + }); + /* col_lights_info */ + cur_size + }) } pub fn max_size(&self) -> Vec2 { self.max_size } @@ -783,7 +800,7 @@ fn add_to_atlas( // // TODO: See if we can speed this up using SIMD. fn draw_col_lights( - (col_lights, cur_size): &mut ColLightInfo, + (col_lights, cur_size): /*&mut ColLightInfo*/(&mut [[u8; 4]], Vec2), data: &mut D, todo_rects: Vec, draw_delta: Vec3, @@ -793,6 +810,7 @@ fn draw_col_lights( mut get_opacity: impl FnMut(&mut D, Vec3) -> bool, mut make_face_texel: impl FnMut(&mut D, Vec3, u8, u8, bool) -> [u8; 4], ) { + let col_lights = &mut col_lights[0..cur_size.y as usize * cur_size.x as usize]; todo_rects.into_iter().for_each(|(pos, uv, rect, delta)| { // NOTE: Conversions are safe because width, height, and offset must be // non-negative, and because every allocated coordinate in the atlas must be in diff --git a/voxygen/src/mesh/terrain.rs b/voxygen/src/mesh/terrain.rs index 918ba14ba0..eb84c54aa1 100644 --- a/voxygen/src/mesh/terrain.rs +++ b/voxygen/src/mesh/terrain.rs @@ -5,7 +5,7 @@ use crate::{ greedy::{self, GreedyConfig, GreedyMesh}, MeshGen, }, - render::{ColLightInfo, FluidVertex, Mesh, TerrainVertex}, + render::{ColLightInfo, FluidVertex, Mesh, Model, TerrainVertex}, scene::terrain::BlocksOfInterest, }; use common::{ @@ -332,6 +332,7 @@ type V = TerrainChunk; #[inline(always)] pub fn generate_mesh<'a/*, V: RectRasterableVol + ReadVol + Debug + 'static*/>( vol: &'a VolGrid2d, + create_texture: impl Fn(usize) -> Option>, (range, max_texture_size, boi): (Aabb, Vec2, &'a BlocksOfInterest), ) -> MeshGen< TerrainVertex, @@ -339,7 +340,7 @@ pub fn generate_mesh<'a/*, V: RectRasterableVol + ReadVol + Debug + TerrainVertex, ( Aabb, - ColLightInfo, + /*ColLightInfo*/(Option>, Vec2), Arc) -> f32 + Send + Sync>, Arc) -> f32 + Send + Sync>, ), @@ -997,10 +998,14 @@ pub fn generate_mesh<'a/*, V: RectRasterableVol + ReadVol + Debug + max: max_bounds + min_bounds, }; // WGPU requires this alignment. - let (col_lights, col_lights_size) = greedy.finalize( + let /*(col_lights, col_lights_size)*/(col_lights_alloc_size, finalize) = greedy.finalize( Vec2::new((wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / 4) as u16, 1), ); - + // Allocate the fresh mesh. + let mut col_lights = create_texture(col_lights_alloc_size); + let col_lights_size = col_lights.as_mut().map(|col_lights| { + finalize(bytemuck::cast_slice_mut(&mut col_lights.get_mapped_mut(0, col_lights.len()))) + }).unwrap_or(Vec2::broadcast(0)); ( opaque_mesh, fluid_mesh, diff --git a/voxygen/src/render/buffer.rs b/voxygen/src/render/buffer.rs index 902c53a0aa..142bead1b5 100644 --- a/voxygen/src/render/buffer.rs +++ b/voxygen/src/render/buffer.rs @@ -16,14 +16,18 @@ impl Buffer { label: None, mapped_at_creation: true, size: len as u64 * std::mem::size_of::() as u64, - usage: usage | wgpu::BufferUsage::COPY_DST, + usage: usage, }), len, phantom_data: std::marker::PhantomData, } } - pub fn new(device: &wgpu::Device, usage: wgpu::BufferUsage, data: &[T]) -> Self { + /// NOTE: Queue is not *explicitly* used here, but it is implicitly used during the unmap + /// (within wgpu internals) when mapped at creation, which is called by create_buffer_init, + /// and requires acquiring a lock on it, so it's left in the API to deter people from using + /// it when the queue isn't available. + pub fn new(device: &wgpu::Device, _queue: &wgpu::Queue, usage: wgpu::BufferUsage, data: &[T]) -> Self { let contents = bytemuck::cast_slice(data); Self { @@ -39,42 +43,6 @@ impl Buffer { #[allow(clippy::len_without_is_empty)] pub fn len(&self) -> usize { self.len } -} - -pub struct DynamicBuffer(Buffer); - -impl DynamicBuffer { - pub fn new(device: &wgpu::Device, len: usize, usage: wgpu::BufferUsage) -> Self { - let buffer = Buffer { - buf: device.create_buffer(&wgpu::BufferDescriptor { - label: None, - mapped_at_creation: false, - size: len as u64 * std::mem::size_of::() as u64, - usage: usage | wgpu::BufferUsage::COPY_DST, - }), - len, - phantom_data: std::marker::PhantomData, - }; - Self(buffer) - } - - pub fn new_with_data(device: &wgpu::Device, usage: wgpu::BufferUsage, data: &[T]) -> Self { - Self(Buffer::new(device, usage | wgpu::BufferUsage::COPY_DST, data)) - } - - pub fn new_mapped(device: &wgpu::Device, len: usize, usage: wgpu::BufferUsage) -> Self { - Self(Buffer::new_mapped(device, len, usage | wgpu::BufferUsage::COPY_DST)) - } - - pub fn update(&self, queue: &wgpu::Queue, vals: &[T], offset: usize) { - if !vals.is_empty() { - queue.write_buffer( - &self.buf, - offset as u64 * std::mem::size_of::() as u64, - bytemuck::cast_slice(vals), - ) - } - } /// Get the GPU-side mapped slice represented by this buffer handle, if it was previously /// memory mapped. @@ -100,8 +68,8 @@ impl DynamicBuffer { /// unmapped), either directly or via [Buffer::new_mapped]. /// /// NOTE: Queue is not *explicitly* used here, but it is implicitly used during the unmap - /// (within wgpu internals) and requires acquiring a lock on it, so it's left in the API to - /// deter people from using it when the queue isn't available. + /// (within wgpu internals) when mapped at creation, and requires acquiring a lock on it, + /// so it's left in the API to deter people from using it when the queue isn't available. pub fn unmap(&self, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */) { /* if !vals.is_empty() { let contents = bytemuck::cast_slice(vals); @@ -117,6 +85,42 @@ impl DynamicBuffer { } } +pub struct DynamicBuffer(Buffer); + +impl DynamicBuffer { + pub fn new(device: &wgpu::Device, len: usize, usage: wgpu::BufferUsage) -> Self { + let buffer = Buffer { + buf: device.create_buffer(&wgpu::BufferDescriptor { + label: None, + mapped_at_creation: false, + size: len as u64 * std::mem::size_of::() as u64, + usage: usage, + }), + len, + phantom_data: std::marker::PhantomData, + }; + Self(buffer) + } + + pub fn new_with_data(device: &wgpu::Device, queue: &wgpu::Queue, usage: wgpu::BufferUsage, data: &[T]) -> Self { + Self(Buffer::new(device, queue, usage, data)) + } + + pub fn new_mapped(device: &wgpu::Device, len: usize, usage: wgpu::BufferUsage) -> Self { + Self(Buffer::new_mapped(device, len, usage)) + } + + pub fn update(&self, queue: &wgpu::Queue, vals: &[T], offset: usize) { + if !vals.is_empty() { + queue.write_buffer( + &self.buf, + offset as u64 * std::mem::size_of::() as u64, + bytemuck::cast_slice(vals), + ) + } + } +} + impl std::ops::Deref for DynamicBuffer { type Target = Buffer; diff --git a/voxygen/src/render/consts.rs b/voxygen/src/render/consts.rs index 2b7d0f826f..b0e1c1f737 100644 --- a/voxygen/src/render/consts.rs +++ b/voxygen/src/render/consts.rs @@ -10,27 +10,25 @@ pub struct Consts { impl Consts { /// Create a new `Const`. - pub fn new(device: &wgpu::Device, len: usize) -> Self { + pub fn new(device: &wgpu::Device, usage: wgpu::BufferUsage, len: usize) -> Self { Self { // TODO: examine if all our consts need to be updatable - buf: DynamicBuffer::new(device, len, wgpu::BufferUsage::UNIFORM), + buf: DynamicBuffer::new(device, len, wgpu::BufferUsage::COPY_DST | wgpu::BufferUsage::UNIFORM), } } - pub fn new_with_data(device: &wgpu::Device, data: &[T]) -> Self { + pub fn new_with_data(device: &wgpu::Device, queue: &wgpu::Queue, usage: wgpu::BufferUsage, data: &[T]) -> Self { Self { - // TODO: examine if all our consts need to be updatable - buf: DynamicBuffer::new_with_data(device, wgpu::BufferUsage::UNIFORM, data), + buf: DynamicBuffer::new_with_data(device, queue, usage | wgpu::BufferUsage::UNIFORM, data), } } /// Create a new `Const` that is mapped at creation. /// /// Warning: buffer must be unmapped before attempting to use this buffer on the GPU! - pub fn new_mapped(device: &wgpu::Device, len: usize) -> Self { + pub fn new_mapped(device: &wgpu::Device, usage: wgpu::BufferUsage, len: usize) -> Self { Self { - // TODO: examine if all our consts need to be updatable - buf: DynamicBuffer::new_mapped(device, len, wgpu::BufferUsage::UNIFORM), + buf: DynamicBuffer::new_mapped(device, len, usage | wgpu::BufferUsage::UNIFORM), } } diff --git a/voxygen/src/render/instances.rs b/voxygen/src/render/instances.rs index 2b1607f4e1..afb02beec7 100644 --- a/voxygen/src/render/instances.rs +++ b/voxygen/src/render/instances.rs @@ -1,34 +1,38 @@ -use super::buffer::DynamicBuffer; +use super::buffer::Buffer; use bytemuck::Pod; /// Represents a mesh that has been sent to the GPU. pub struct Instances { - buf: DynamicBuffer, + buf: Buffer, } impl Instances { - pub fn new(device: &wgpu::Device, len: usize) -> Self { + pub fn new_mapped(device: &wgpu::Device, len: usize) -> Self { Self { - // TODO: examine if we have Instances that are not updated (e.g. sprites) and if there - // would be any gains from separating those out - buf: DynamicBuffer::new(device, len, wgpu::BufferUsage::VERTEX), + buf: Buffer::new_mapped(device, len, wgpu::BufferUsage::VERTEX), } } - pub fn new_with_data(device: &wgpu::Device, data: &[T]) -> Self { + pub fn new_with_data(device: &wgpu::Device, queue: &wgpu::Queue, data: &[T]) -> Self { Self { - // TODO: examine if we have Instances that are not updated (e.g. sprites) and if there - // would be any gains from separating those out - buf: DynamicBuffer::new_with_data(device, wgpu::BufferUsage::VERTEX, data), + buf: Buffer::new(device, queue, wgpu::BufferUsage::VERTEX, data), } } + /// Get the GPU-side mapped slice represented by this instances buffer, if it was previously + /// memory mapped. + pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> { + self.buf.get_mapped_mut(offset, len) + } + + /// Unmaps the GPU-side handle represented by this instances buffer, if it was previously + /// memory-mapped. + pub fn unmap(&self, queue: &wgpu::Queue) { + self.buf.unmap(queue); + } + // TODO: count vs len naming scheme?? pub fn count(&self) -> usize { self.buf.len() } - pub fn update(&mut self, queue: &wgpu::Queue, vals: &[T], offset: usize) { - self.buf.update(queue, vals, offset) - } - pub fn buf(&self) -> &wgpu::Buffer { &self.buf.buf } } diff --git a/voxygen/src/render/mod.rs b/voxygen/src/render/mod.rs index 43cc32a836..2bcaacf2d5 100644 --- a/voxygen/src/render/mod.rs +++ b/voxygen/src/render/mod.rs @@ -35,7 +35,8 @@ pub use self::{ skybox::{create_mesh as create_skybox_mesh, Vertex as SkyboxVertex}, sprite::{ Instance as SpriteInstance, SpriteGlobalsBindGroup, SpriteVerts, - Vertex as SpriteVertex, VERT_PAGE_SIZE as SPRITE_VERT_PAGE_SIZE, + Vertex as SpriteVertex, LOD_LEVELS as SPRITE_LOD_LEVELS, + VERT_PAGE_SIZE as SPRITE_VERT_PAGE_SIZE, }, terrain::{Locals as TerrainLocals, TerrainLayout, Vertex as TerrainVertex}, trail::Vertex as TrailVertex, diff --git a/voxygen/src/render/model.rs b/voxygen/src/render/model.rs index 7ae8d83f69..ace622e87b 100644 --- a/voxygen/src/render/model.rs +++ b/voxygen/src/render/model.rs @@ -30,16 +30,41 @@ pub struct Model { impl Model { /// Returns None if the provided mesh is empty - pub fn new(device: &wgpu::Device, usage: wgpu::BufferUsage, mesh: &Mesh) -> Option { + pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, usage: wgpu::BufferUsage, mesh: &Mesh) -> Option { if mesh.vertices().is_empty() { return None; } Some(Self { - vbuf: Buffer::new(device, /*wgpu::BufferUsage::VERTEX*/usage, mesh.vertices()), + vbuf: Buffer::new(device, queue, /*wgpu::BufferUsage::VERTEX*/usage, mesh.vertices()), }) } + /// Create a new `Const` that is mapped at creation. Returns None if the mesh is empty. + /// + /// Warning: buffer must be unmapped before attempting to use this buffer on the GPU! + pub fn new_mapped(device: &wgpu::Device, len: usize, usage: wgpu::BufferUsage) -> Option { + if len == 0 { + return None; + } + + Some(Self { + vbuf: Buffer::new_mapped(device, len, /*wgpu::BufferUsage::VERTEX*/usage/*, mesh.vertices()*/), + }) + } + + /// Get the GPU-side mapped slice represented by this model handle, if it was previously + /// memory mapped. + pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> { + self.vbuf.get_mapped_mut(offset, len) + } + + /// Unmaps the GPU-side handle represented by this model handle, if it was previously + /// memory-mapped. + pub fn unmap(&self, queue: &wgpu::Queue) { + self.vbuf.unmap(queue); + } + /// Create a model with a slice of a portion of this model to send to the /// renderer. pub fn submodel(&self, vertex_range: Range) -> SubModel { @@ -64,7 +89,7 @@ pub struct DynamicModel { impl DynamicModel { pub fn new(device: &wgpu::Device, size: usize) -> Self { Self { - vbuf: DynamicBuffer::new(device, size, wgpu::BufferUsage::VERTEX), + vbuf: DynamicBuffer::new(device, size, wgpu::BufferUsage::VERTEX | wgpu::BufferUsage::COPY_DST), } } diff --git a/voxygen/src/render/pipelines/sprite.rs b/voxygen/src/render/pipelines/sprite.rs index c309a50bfb..8ea6aec0da 100644 --- a/voxygen/src/render/pipelines/sprite.rs +++ b/voxygen/src/render/pipelines/sprite.rs @@ -9,6 +9,7 @@ use std::mem; use vek::*; pub const VERT_PAGE_SIZE: u32 = 256; +pub const LOD_LEVELS: usize = 5; #[repr(C)] #[derive(Copy, Clone, Debug, Zeroable, Pod)] @@ -81,11 +82,13 @@ pub struct SpriteVerts(Buffer); pub(in super::super) fn create_verts_buffer( device: &wgpu::Device, + queue: &wgpu::Queue, mesh: Mesh, ) -> SpriteVerts { // TODO: type Buffer by wgpu::BufferUsage SpriteVerts(Buffer::new( device, + queue, wgpu::BufferUsage::STORAGE, mesh.vertices(), )) diff --git a/voxygen/src/render/renderer.rs b/voxygen/src/render/renderer.rs index 9a803fa18b..c3eef719bd 100644 --- a/voxygen/src/render/renderer.rs +++ b/voxygen/src/render/renderer.rs @@ -475,9 +475,9 @@ impl Renderer { )?; let clouds_locals = - Self::create_consts_inner(&device, &[clouds::Locals::default()]); + Self::create_consts_inner(&device, &queue, wgpu::BufferUsage::COPY_DST, &[clouds::Locals::default()]); let postprocess_locals = - Self::create_consts_inner(&device, &[postprocess::Locals::default()]); + Self::create_consts_inner(&device, &queue, wgpu::BufferUsage::COPY_DST, &[postprocess::Locals::default()]); let locals = Locals::new( &device, @@ -488,7 +488,7 @@ impl Renderer { &views.tgt_depth, views.bloom_tgts.as_ref().map(|tgts| locals::BloomParams { locals: bloom_sizes.map(|size| { - Self::create_consts_inner(&device, &[bloom::Locals::new(size)]) + Self::create_consts_inner(&device, &queue, wgpu::BufferUsage::empty(), &[bloom::Locals::new(size)]) }), src_views: [&views.tgt_color_pp, &tgts[1], &tgts[2], &tgts[3], &tgts[4]], final_tgt_view: &tgts[0], @@ -499,9 +499,9 @@ impl Renderer { ); let quad_index_buffer_u16 = - create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_VERT_LEN.into()); + create_quad_index_buffer_u16(&device, &queue, QUAD_INDEX_BUFFER_U16_VERT_LEN.into()); let quad_index_buffer_u32 = - create_quad_index_buffer_u32(&device, QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize); + create_quad_index_buffer_u32(&device, &queue, QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize); let mut profiler = wgpu_profiler::GpuProfiler::new(4, queue.get_timestamp_period()); other_modes.profiler_enabled &= profiler_features_enabled; profiler.enable_timer = other_modes.profiler_enabled; @@ -513,12 +513,15 @@ impl Renderer { let (maintain_tx, maintain_rx) = channel::bounded(0); let device_ = Arc::clone(&device); - std::thread::spawn(move || { + /* std::thread::spawn(move || { // Maintain each time we are requested to do so, until the renderer dies. + // Additionally, accepts CPU->GPU tasks containing updates to perform that need to lock + // the device (but not necessarily the queue?). This is a hopefully temporary measure + // required because wgpu as currently written cannot help itself. while let Ok(()) = maintain_rx.recv() { device_.poll(wgpu::Maintain::Poll); } - }); + }); */ #[cfg(feature = "egui-ui")] let egui_renderpass = @@ -690,7 +693,7 @@ impl Renderer { .as_ref() .map(|tgts| locals::BloomParams { locals: bloom_sizes.map(|size| { - Self::create_consts_inner(&self.device, &[bloom::Locals::new( + Self::create_consts_inner(&self.device, &self.queue, wgpu::BufferUsage::empty(), &[bloom::Locals::new( size, )]) }), @@ -813,7 +816,8 @@ impl Renderer { // Since if the channel is out of capacity, it means a maintain is already being processed // (in which case we can just catch up next frame), this is a long-winded way of saying we // can ignore the result of try_send. - let _ = self.maintain_tx.try_send(()); + // let _ = self.maintain_tx.try_send(()); + self.device.poll(wgpu::Maintain::Poll); } /// Create render target views @@ -1255,22 +1259,25 @@ impl Renderer { } /// Create a new set of constants with the provided values. - pub fn create_consts(&mut self, vals: &[T]) -> Consts { - Self::create_consts_inner(&self.device, vals) + pub fn create_consts(&mut self, usage: wgpu::BufferUsage, vals: &[T]) -> Consts { + Self::create_consts_inner(&self.device, &self.queue, usage, vals) } pub fn create_consts_inner( device: &wgpu::Device, + queue: &wgpu::Queue, + usage: wgpu::BufferUsage, vals: &[T], ) -> Consts { - Consts::new_with_data(device, vals) + Consts::new_with_data(device, queue, usage, vals) } pub fn create_consts_mapped( &mut self, + usage: wgpu::BufferUsage, len: usize, ) -> Consts { - Consts::new_mapped(&self.device, len) + Consts::new_mapped(&self.device, usage, len) } /// Update a set of constants with the provided values. @@ -1278,16 +1285,12 @@ impl Renderer { consts.update(&self.queue, vals, 0) } - /// Gets a memory mapped buffer of a set of constants. - pub fn get_consts_mapped<'a, T: Copy + bytemuck::Pod>(&self, consts: &'a Consts) -> /* &'a mut [T] */wgpu::BufferViewMut<'a> { - consts.get_mapped_mut(0, consts.len()) - } - - /// Unmaps a set of memory mapped constants. + /// Unmaps a set of memory mapped consts. pub fn unmap_consts(&self, consts: &Consts) { consts.unmap(&self.queue) } + pub fn update_clouds_locals(&mut self, new_val: clouds::Locals) { self.locals.clouds.update(&self.queue, &[new_val], 0) } @@ -1301,16 +1304,21 @@ impl Renderer { &mut self, vals: &[T], ) -> Result, RenderError> { - Ok(Instances::new_with_data(&self.device, vals)) + Ok(Instances::new_with_data(&self.device, &self.queue, vals)) } - /// Create a new set of instances with the provided values lazily (for use off the main + /// Create a new set of instances with the provided size lazily (for use off the main /// thread). pub fn create_instances_lazy( &mut self, - ) -> impl for<'a> Fn(&'a [T]) -> Instances + Send + Sync { + ) -> impl /*for<'a> */Fn(/* &'a [T]*/usize) -> Instances + Send + Sync { let device = Arc::clone(&self.device); - move |vals| Instances::new_with_data(&device, &vals) + move |/*vals*/len| Instances::new_mapped(&device, len)/*Instances::new_with_data(&device, &vals)*/ + } + + /// Unmaps a set of memory mapped instances. + pub fn unmap_instances(&self, instances: &Instances) { + instances.unmap(&self.queue) } /// Update the expected index length to be large enough for a quad vertex bfufer with this many @@ -1351,7 +1359,7 @@ impl Renderer { if self.quad_index_buffer_u32.len() < quad_index_length { // Make sure we aren't over the max self.quad_index_buffer_u32 = - create_quad_index_buffer_u32(&self.device, vert_length); + create_quad_index_buffer_u32(&self.device, &self.queue, vert_length); } */ }, None => {}, @@ -1369,33 +1377,53 @@ impl Renderer { let vert_length = self.quad_index_buffer_u32_len.load(Ordering::Relaxed); if self.quad_index_buffer_u32.len() < vert_length { self.quad_index_buffer_u32 = - create_quad_index_buffer_u32(&self.device, vert_length); + create_quad_index_buffer_u32(&self.device, &self.queue, vert_length); } } pub fn create_sprite_verts(&mut self, mesh: Mesh) -> sprite::SpriteVerts { Self::update_index_length::(&self.quad_index_buffer_u32_len, sprite::VERT_PAGE_SIZE as usize); - sprite::create_verts_buffer(&self.device, mesh) + sprite::create_verts_buffer(&self.device, &self.queue, mesh) } /// Create a new model from the provided mesh. /// If the provided mesh is empty this returns None pub fn create_model(&mut self, mesh: &Mesh) -> Option> { Self::update_index_length::(&self.quad_index_buffer_u32_len, mesh.vertices().len()); - Model::new(&self.device, wgpu::BufferUsage::VERTEX, mesh) + Model::new(&self.device, &self.queue, wgpu::BufferUsage::VERTEX, mesh) } - /// Create a new model from the provided mesh, lazily (for use off the main thread). - /// If the provided mesh is empty this returns None - pub fn create_model_lazy(&mut self, usage: wgpu::BufferUsage) -> impl for<'a> Fn(&'a Mesh) -> Option> + Send + Sync { + /// Create a new model for a mesh with the provided length, lazily (for use off the main + /// thread). If the provided mesh is empty this returns None. The mesh is memory mapped, and + /// still needs to be unmapped before use. + pub fn create_model_lazy_base(&mut self, usage: wgpu::BufferUsage) -> impl Fn(usize) -> Option> + Send + Sync { let device = Arc::clone(&self.device); let quad_index_buffer_u32_len = Arc::clone(&self.quad_index_buffer_u32_len); - move |mesh| { - Self::update_index_length::(&quad_index_buffer_u32_len, mesh.vertices().len()); - Model::new(&device, usage, mesh) + move |len| { + Self::update_index_length::(&quad_index_buffer_u32_len, len); + Model::new_mapped(&device, len, usage/*, mesh.vertices()*/) } } + /// Create a new model for a mesh with the provided length, lazily (for use off the main + /// thread). If the provided mesh is empty this returns None. The mesh is memory mapped, and + /// still needs to be unmapped before use. + pub fn create_model_lazy(&mut self, usage: wgpu::BufferUsage) -> impl for<'a> Fn(&'a Mesh) -> Option> + Send + Sync { + let create_model = self.create_model_lazy_base(usage); + move |mesh| { + let len = mesh.vertices().len(); + let model = create_model(len)?; + model.get_mapped_mut(0, len) + .copy_from_slice(bytemuck::cast_slice(mesh.vertices())); + Some(model) + } + } + + /// Unmaps a memory mapped model. + pub fn unmap_model(&self, model: &Model) { + model.unmap(&self.queue); + } + /// Create a new dynamic model with the specified size. pub fn create_dynamic_model(&mut self, size: usize) -> DynamicModel { Self::update_index_length::(&self.quad_index_buffer_u32_len, size); @@ -1515,13 +1543,6 @@ impl Renderer { texture.clear(&self.queue) } - /// Replaces the destination texture with the contents of the source texture. - /// - /// The source size should at least fit within the destination texture's size. - pub fn replace_texture(&mut self, encoder: &mut wgpu::CommandEncoder, dest: &Texture, source: &Texture) { - dest.replace(&self.device, encoder, source); - } - /// Queue to obtain a screenshot on the next frame render pub fn create_screenshot( &mut self, @@ -1613,7 +1634,7 @@ impl Renderer { // } } -fn create_quad_index_buffer_u16(device: &wgpu::Device, vert_length: usize) -> Buffer { +fn create_quad_index_buffer_u16(device: &wgpu::Device, queue: &wgpu::Queue, vert_length: usize) -> Buffer { assert!(vert_length <= u16::MAX as usize); let indices = [0, 1, 2, 2, 1, 3] .iter() @@ -1624,10 +1645,10 @@ fn create_quad_index_buffer_u16(device: &wgpu::Device, vert_length: usize) -> Bu .map(|(i, b)| (i / 6 * 4 + b) as u16) .collect::>(); - Buffer::new(device, wgpu::BufferUsage::INDEX, &indices) + Buffer::new(device, queue, wgpu::BufferUsage::INDEX, &indices) } -fn create_quad_index_buffer_u32(device: &wgpu::Device, vert_length: usize) -> Buffer { +fn create_quad_index_buffer_u32(device: &wgpu::Device, queue: &wgpu::Queue, vert_length: usize) -> Buffer { assert!(vert_length <= u32::MAX as usize); let indices = [0, 1, 2, 2, 1, 3] .iter() @@ -1638,5 +1659,5 @@ fn create_quad_index_buffer_u32(device: &wgpu::Device, vert_length: usize) -> Bu .map(|(i, b)| (i / 6 * 4 + b) as u32) .collect::>(); - Buffer::new(device, wgpu::BufferUsage::INDEX, &indices) + Buffer::new(device, queue, wgpu::BufferUsage::INDEX, &indices) } diff --git a/voxygen/src/render/renderer/binding.rs b/voxygen/src/render/renderer/binding.rs index 7f34914bed..d041abe343 100644 --- a/voxygen/src/render/renderer/binding.rs +++ b/voxygen/src/render/renderer/binding.rs @@ -40,12 +40,12 @@ impl Renderer { } pub fn create_debug_bound_locals(&mut self, vals: &[debug::Locals]) -> debug::BoundLocals { - let locals = self.create_consts(vals); + let locals = self.create_consts(wgpu::BufferUsage::COPY_DST, vals); self.layouts.debug.bind_locals(&self.device, locals) } pub fn create_ui_bound_locals(&mut self, vals: &[ui::Locals]) -> ui::BoundLocals { - let locals = self.create_consts(vals); + let locals = self.create_consts(wgpu::BufferUsage::COPY_DST, vals); self.layouts.ui.bind_locals(&self.device, locals) } @@ -58,22 +58,13 @@ impl Renderer { locals: &[figure::Locals], bone_data: &[figure::BoneData], ) -> figure::BoundLocals { - let locals = self.create_consts(locals); - let bone_data = self.create_consts(bone_data); + let locals = self.create_consts(wgpu::BufferUsage::COPY_DST, locals); + let bone_data = self.create_consts(wgpu::BufferUsage::COPY_DST, bone_data); self.layouts .figure .bind_locals(&self.device, locals, bone_data) } - /* /// Create a new set of constants with the provided values, lazily (so this can be instantiated - /// from another thread). - pub fn create_consts_lazy(&mut self) -> - impl for<'a> Fn(&'a [T]) -> Consts + Send + Sync - { - let device = Arc::clone(&self.device); - move |vals| Self::create_consts_inner(&device, vals) - } */ - /// NOTE: Locals are mapped at creation, so you still have to memory map and bind them in order /// before use. pub fn create_terrain_bound_locals( @@ -84,14 +75,14 @@ impl Renderer { /* let device = Arc::clone(&self.device); let immutable = Arc::clone(&self.layouts.immutable); move || { - let locals = Consts::new_mapped(&device, 1); + let locals = Consts::new_mapped(&device, wgpu::BufferUsage::empty(), 1); immutable.terrain.bind_locals(&device, locals) } */ self.layouts.immutable.terrain.bind_locals(&self.device, locals/* , offset */) } pub fn create_shadow_bound_locals(&mut self, locals: &[shadow::Locals]) -> shadow::BoundLocals { - let locals = self.create_consts(locals); + let locals = self.create_consts(wgpu::BufferUsage::COPY_DST, locals); self.layouts.shadow.bind_locals(&self.device, locals) } @@ -99,7 +90,7 @@ impl Renderer { &mut self, locals: &[rain_occlusion::Locals], ) -> rain_occlusion::BoundLocals { - let locals = self.create_consts(locals); + let locals = self.create_consts(wgpu::BufferUsage::COPY_DST, locals); self.layouts .rain_occlusion .bind_locals(&self.device, locals) diff --git a/voxygen/src/render/renderer/drawer.rs b/voxygen/src/render/renderer/drawer.rs index 743c99bb29..3d173536e7 100644 --- a/voxygen/src/render/renderer/drawer.rs +++ b/voxygen/src/render/renderer/drawer.rs @@ -976,7 +976,7 @@ impl<'pass_ref, 'pass: 'pass_ref> SpriteDrawer<'pass_ref, 'pass> { pub fn draw<'data: 'pass>( &mut self, &(terrain_locals_offset, ref terrain_locals): &'data (wgpu::DynamicOffset, terrain::BoundLocals), - instances: &'data Instances, + (range, instances): (Range, &'data Instances), ) { self.render_pass .set_bind_group(3, &terrain_locals.bind_group, &[terrain_locals_offset]); @@ -986,7 +986,7 @@ impl<'pass_ref, 'pass: 'pass_ref> SpriteDrawer<'pass_ref, 'pass> { self.render_pass.draw_indexed( 0..sprite::VERT_PAGE_SIZE / 4 * 6, 0, - 0..instances.count() as u32, + range, ); } } diff --git a/voxygen/src/render/texture.rs b/voxygen/src/render/texture.rs index 65f174bb7d..85b7dfc6d8 100644 --- a/voxygen/src/render/texture.rs +++ b/voxygen/src/render/texture.rs @@ -193,7 +193,7 @@ impl Texture { /// Replaces this texture with the contents of another texture. /// /// The source size should at least fit within this texture's size. - pub fn replace<'a>(&self, device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, texture: &Self) { + pub fn replace<'a>(&self, encoder: &mut wgpu::CommandEncoder, texture: &Self) { // Copy image encoder.copy_texture_to_texture( wgpu::ImageCopyTexture { diff --git a/voxygen/src/scene/figure/cache.rs b/voxygen/src/scene/figure/cache.rs index d8962b8f54..5d6b67bfc4 100644 --- a/voxygen/src/scene/figure/cache.rs +++ b/voxygen/src/scene/figure/cache.rs @@ -463,8 +463,13 @@ where make_model(generate_mesh_lod_low), ]; + let (col_lights_alloc_size, finalize) = greedy.finalize(Vec2::broadcast(1)); + let mut col_lights = vec![[0; 4]; col_lights_alloc_size]; + let col_lights_size = finalize(&mut col_lights); + let col_light = (col_lights, col_lights_size); + slot_.store(Some(MeshWorkerResponse { - col_light: greedy.finalize(Vec2::broadcast(1)), + col_light, opaque, bounds: figure_bounds, vertex_range: models, diff --git a/voxygen/src/scene/mod.rs b/voxygen/src/scene/mod.rs index 5d7d66dab5..be37f451c5 100644 --- a/voxygen/src/scene/mod.rs +++ b/voxygen/src/scene/mod.rs @@ -282,9 +282,9 @@ impl Scene { let sprite_render_context = lazy_init(renderer); let data = GlobalModel { - globals: renderer.create_consts(&[Globals::default()]), - lights: renderer.create_consts(&[Light::default(); MAX_LIGHT_COUNT]), - shadows: renderer.create_consts(&[Shadow::default(); MAX_SHADOW_COUNT]), + globals: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Globals::default()]), + lights: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Light::default(); MAX_LIGHT_COUNT]), + shadows: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Shadow::default(); MAX_SHADOW_COUNT]), shadow_mats: renderer.create_shadow_bound_locals(&[ShadowLocals::default()]), rain_occlusion_mats: renderer .create_rain_occlusion_bound_locals(&[RainOcclusionLocals::default()]), diff --git a/voxygen/src/scene/simple.rs b/voxygen/src/scene/simple.rs index c89cb35bd9..e27fff81e5 100644 --- a/voxygen/src/scene/simple.rs +++ b/voxygen/src/scene/simple.rs @@ -109,9 +109,9 @@ impl Scene { let mut col_lights = FigureColLights::new(renderer); let data = GlobalModel { - globals: renderer.create_consts(&[Globals::default()]), - lights: renderer.create_consts(&[Light::default(); 20]), - shadows: renderer.create_consts(&[Shadow::default(); 24]), + globals: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Globals::default()]), + lights: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Light::default(); 20]), + shadows: renderer.create_consts(wgpu::BufferUsage::COPY_DST, &[Shadow::default(); 24]), shadow_mats: renderer.create_shadow_bound_locals(&[ShadowLocals::default()]), rain_occlusion_mats: renderer .create_rain_occlusion_bound_locals(&[RainOcclusionLocals::default()]), @@ -145,9 +145,15 @@ impl Scene { // total size is bounded by 2^24 * 3 * 1.5 which is bounded by // 2^27, which fits in a u32. let range = 0..opaque_mesh.vertices().len() as u32; + + let (col_lights_alloc_size, finalize) = greedy.finalize(Vec2::broadcast(1)); + let mut col_light = vec![[0; 4]; col_lights_alloc_size]; + let col_lights_size = finalize(&mut col_light); + let col_light = (col_light, col_lights_size); + let model = col_lights - .create_figure(renderer, greedy.finalize(Vec2::broadcast(1)), (opaque_mesh, bounds), [range]); + .create_figure(renderer, col_light, (opaque_mesh, bounds), [range]); let mut buf = [Default::default(); anim::MAX_BONE_COUNT]; let common_params = FigureUpdateCommonParameters { entity: None, diff --git a/voxygen/src/scene/terrain.rs b/voxygen/src/scene/terrain.rs index 3115db0140..b126195db2 100644 --- a/voxygen/src/scene/terrain.rs +++ b/voxygen/src/scene/terrain.rs @@ -12,7 +12,7 @@ use crate::{ pipelines::{self, ColLights}, ColLightInfo, Consts, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model, RenderError, Renderer, SpriteGlobalsBindGroup, SpriteInstance, SpriteVertex, SpriteVerts, - TerrainLocals, TerrainShadowDrawer, TerrainVertex, Texture, SPRITE_VERT_PAGE_SIZE, + TerrainLocals, TerrainShadowDrawer, TerrainVertex, Texture, SPRITE_LOD_LEVELS, SPRITE_VERT_PAGE_SIZE, }, }; @@ -46,7 +46,6 @@ use treeculler::{BVol, Frustum, AABB}; use vek::*; const SPRITE_SCALE: Vec3 = Vec3::new(1.0 / 11.0, 1.0 / 11.0, 1.0 / 11.0); -const SPRITE_LOD_LEVELS: usize = 5; // For rain occlusion we only need to render the closest chunks. /// How many chunks are maximally rendered for rain occlusion. @@ -91,7 +90,7 @@ pub struct TerrainChunkData { col_lights: Arc>, light_map: LightMapFn, glow_map: LightMapFn, - sprite_instances: [Instances; SPRITE_LOD_LEVELS], + sprite_instances: ([core::ops::Range; SPRITE_LOD_LEVELS], Instances), locals: (wgpu::DynamicOffset, pipelines::terrain::BoundLocals), pub blocks_of_interest: BlocksOfInterest, @@ -143,7 +142,7 @@ pub struct MeshWorkerResponseMesh { /// mesh of a chunk. struct MeshWorkerResponse { pos: Vec2, - sprite_instances: [Instances; SPRITE_LOD_LEVELS], + sprite_instances: ([core::ops::Range; SPRITE_LOD_LEVELS], Instances), /// If None, this update was requested without meshing. mesh: Option, started_tick: u64, @@ -259,9 +258,9 @@ fn mesh_worker/* + RectRasterableVol + ReadVol + Debug + sprite_config: &SpriteSpec, create_opaque: impl for<'a> Fn(&'a Mesh) -> Option>, create_fluid: impl for<'a> Fn(&'a Mesh) -> Option>, - create_instances: impl for<'a> Fn(&'a [SpriteInstance]) -> Instances, + create_instances: impl for<'a> Fn(/* &'a [SpriteInstance] */usize) -> Instances, /* create_locals: impl Fn() -> pipelines::terrain::BoundLocals, */ - create_texture: impl for<'a> Fn(/* wgpu::TextureDescriptor<'a>, wgpu::TextureViewDescriptor<'a>, wgpu::SamplerDescriptor<'a>*/&'a Mesh<[u8; 4]>) -> /*Texture + Send + Sync*/Option>, + create_texture: impl for<'a> Fn(/* wgpu::TextureDescriptor<'a>, wgpu::TextureViewDescriptor<'a>, wgpu::SamplerDescriptor<'a>*//*&'a Mesh<[u8; 4]>*/usize) -> /*Texture + Send + Sync*/Option>, ) -> MeshWorkerResponse { span!(_guard, "mesh_worker"); let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/; @@ -281,15 +280,16 @@ fn mesh_worker/* + RectRasterableVol + ReadVol + Debug + let (opaque_mesh, fluid_mesh, _shadow_mesh, (bounds, col_lights_info, light_map, glow_map)) = generate_mesh( &volume, + create_texture, ( range, Vec2::new(max_texture_size, max_texture_size), &blocks_of_interest, ), ); - let mut tex_ = Mesh::new(); + /* let mut tex_ = Mesh::new(); *tex_.vertices_mut_vec() = col_lights_info.0; - let tex = create_texture(&tex_); + let tex = create_texture(&tex_); */ mesh = Some(MeshWorkerResponseMesh { // TODO: Take sprite bounds into account somehow? z_bounds: (bounds.min.z, bounds.max.z), @@ -301,7 +301,7 @@ fn mesh_worker/* + RectRasterableVol + ReadVol + Debug + opaque_model: create_opaque(&opaque_mesh), fluid_model: create_fluid(&fluid_mesh), /* locals: create_locals(), */ - col_lights_info: (tex, col_lights_info.1), + col_lights_info/*: (tex, col_lights_info.1)*/, light_map, glow_map, }); @@ -383,7 +383,25 @@ fn mesh_worker/* + RectRasterableVol + ReadVol + Debug + } */ } - instances.map(|instances| create_instances(&instances)) + let mut start = 0; + let instance_ranges = instances.each_ref().map(|instances| { + let range = start..start + instances.len() as u32; + start = range.end; + range + }); + let sprite_instances = create_instances(instance_ranges.iter().map(|range| range.len()).sum()); + if start > 0 { + sprite_instances + .get_mapped_mut(0, sprite_instances.count()) + .array_chunks_mut::<{ core::mem::size_of::() }>() + .zip(instances.into_iter().flatten()).for_each(|(dst, src)| { + // FIXME: cast doesn't work because bytemuck::cast isn't const generic-ified + // yet, so it fails on some array lengths. + // *dst = bytemuck::cast(src); + dst.copy_from_slice(bytemuck::cast_slice(&[src])); + }); + } + (instance_ranges, sprite_instances) }, mesh, blocks_of_interest, @@ -601,7 +619,10 @@ impl SpriteRenderContext { let sprite_col_lights = { prof_span!("finalize"); - greedy.finalize(Vec2::broadcast(1)) + let (col_lights_alloc_size, finalize) = greedy.finalize(Vec2::broadcast(1)); + let mut col_lights = vec![[0; 4]; col_lights_alloc_size]; + let col_lights_size = finalize(&mut col_lights); + (col_lights, col_lights_size) }; SpriteWorkerResponse { @@ -799,7 +820,7 @@ impl/**/ Terrain { // a copy from the previous atlas, skipping the CPU->GPU upload. if let Some((old_texture, encoder)) = old_texture { // TODO: Delay submission, don't just submit immediately out of convenience! - renderer.replace_texture(encoder, &texture, old_texture); + texture.replace(encoder, old_texture); } else { renderer.clear_texture(&texture); } @@ -1286,7 +1307,7 @@ impl/**/ Terrain { let create_fluid = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX); let create_instances = renderer.create_instances_lazy(); /* let create_locals = renderer.create_terrain_bound_locals(); */ - let create_texture = renderer./*create_texture_raw*/create_model_lazy(wgpu::BufferUsage::COPY_SRC); + let create_texture = renderer./*create_texture_raw*/create_model_lazy_base(wgpu::BufferUsage::COPY_SRC); /* cnt.fetch_add(1, Ordering::Relaxed); */ let job = move || { // Since this loads when the task actually *runs*, rather than when it's @@ -1347,8 +1368,8 @@ impl/**/ Terrain { if max_recv_count > 0 { // Construct a buffer for all the chunks we're going to process in this frame. There might // be some unused slots, which is fine. - let locals = /*Arc::new(*/renderer.create_consts_mapped(max_recv_count as usize)/*)*/; - let mut locals_buffer = renderer.get_consts_mapped(&locals); + let locals = /*Arc::new(*/renderer.create_consts_mapped(wgpu::BufferUsage::empty(), max_recv_count as usize)/*)*/; + let mut locals_buffer = locals.get_mapped_mut(0, locals.len()); let mut locals_bound = renderer.create_terrain_bound_locals(&locals/*, locals_offset */); let mut encoder = renderer.device .create_command_encoder(&wgpu::CommandEncoderDescriptor { @@ -1356,25 +1377,26 @@ impl/**/ Terrain { }); for (locals_offset, (response, locals_buffer)) in incoming_chunks.zip(locals_buffer.array_chunks_mut::<{ core::mem::size_of::() }>()).enumerate() { - match self.mesh_todo.get(&response.pos) { + let pos = response.pos; + let response_started_tick = response.started_tick; + match self.mesh_todo.get(&pos) { // It's the mesh we want, insert the newly finished model into the terrain model // data structure (convert the mesh to a model first of course). Some(todo) => { let started_tick = todo.started_tick.load(Ordering::Relaxed); - if response.started_tick > started_tick { + if response_started_tick > started_tick { // Chunk must have been removed, or it was spawned on an old tick. Drop - // the mesh since it's either out of date or no longer needed. + // the mesh in the background since it's either out of date or no longer + // needed. + slowjob.spawn(&"TERRAIN_DROP", move || { drop(response); }); continue; } - let sprite_instances = response.sprite_instances; - if let Some(mut mesh) = response.mesh { // Full update, insert the whole chunk. - let load_time = self .chunks - .get(&response.pos) + .get(&pos) .map(|chunk| chunk.load_time) .unwrap_or(current_time as f32); // TODO: Allocate new atlas on allocation failure. @@ -1422,6 +1444,16 @@ impl/**/ Terrain { .expect("Chunk data does not fit in a texture of maximum size.") }); + // Unmap buffers mapped on other threads (we do this here to avoid + // contention with queue submission, as both of these take the device write + // lock as of wgpu 0.8.1). + // + // FIXME: When we upgrade wgpu, reconsider all this. + renderer.unmap_instances(&response.sprite_instances.1); + mesh.opaque_model.as_ref().map(|model| renderer.unmap_model(model)); + mesh.fluid_model.as_ref().map(|model| renderer.unmap_model(model)); + renderer.unmap_model(&tex); + // NOTE: Cast is safe since the origin was a u16. let atlas_offs = Vec2::new( allocation.rectangle.min.x as u32, @@ -1467,7 +1499,7 @@ impl/**/ Terrain { let locals_buffer_ = /* renderer.update_mapped(&mut mesh.locals, &[*/TerrainLocals::new( Vec3::from( - response.pos.map2(VolGrid2d::::chunk_size(), |e, sz| { + pos.map2(VolGrid2d::::chunk_size(), |e, sz| { e as f32 * sz as f32 }), ), @@ -1477,7 +1509,7 @@ impl/**/ Terrain { *locals_buffer = bytemuck::cast(locals_buffer_); /* let locals = Arc::clone(&locals); */ - Self::insert_chunk(&slowjob, &mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData { + Self::insert_chunk(&slowjob, &mut self.chunks, &mut self.atlas, pos, TerrainChunkData { load_time, opaque_model: mesh.opaque_model, fluid_model: mesh.fluid_model, @@ -1485,7 +1517,7 @@ impl/**/ Terrain { col_lights: Arc::clone(&self.col_lights), light_map: mesh.light_map, glow_map: mesh.glow_map, - sprite_instances, + sprite_instances: response.sprite_instances, locals: /* mesh.locals *//*renderer.create_terrain_bound_locals(&locals/*, locals_offset */)*/ ((locals_offset * core::mem::size_of::()) as wgpu::DynamicOffset, Arc::clone(&locals_bound)), visible: Visibility { @@ -1499,20 +1531,27 @@ impl/**/ Terrain { shadow_z_bounds: mesh.shadow_z_bounds, frustum_last_plane_index: 0, }); - } else if let Some(chunk) = self.chunks.get_mut(&response.pos) { + } else if let Some(chunk) = self.chunks.get_mut(&pos) { // There was an update that didn't require a remesh (probably related to // non-glowing sprites) so we just update those. - chunk.sprite_instances = sprite_instances; + chunk.sprite_instances = response.sprite_instances; chunk.blocks_of_interest = response.blocks_of_interest; + } else { + // Not sure what happened here, but we should drop the result in the + // background. + slowjob.spawn(&"TERRAIN_DROP", move || { drop(response); }); } - if response.started_tick == started_tick { + if response_started_tick == started_tick { // This was the latest worker for this chunk, so we don't need to worry // about canceling any later tasks. - self.mesh_todo.remove(&response.pos); + self.mesh_todo.remove(&pos); } }, - None => {}, + // Old task, drop the response in the background. + None => { + slowjob.spawn(&"TERRAIN_DROP", move || { drop(response); }); + }, } } // Drop the memory mapping and unmap the locals. @@ -1923,7 +1962,7 @@ impl/**/ Terrain { .filter(|(_, c)| c.visible.is_visible()) .for_each(|(pos, chunk)| { // Skip chunk if it has no sprites - if chunk.sprite_instances[0].count() == 0 { + if chunk.sprite_instances.1.count() == 0 { return; } @@ -1949,7 +1988,7 @@ impl/**/ Terrain { 4 }; - sprite_drawer.draw(&chunk.locals, &chunk.sprite_instances[lod_level]); + sprite_drawer.draw(&chunk.locals, (chunk.sprite_instances.0[lod_level].clone(), &chunk.sprite_instances.1)); } }); drop(sprite_drawer);