Perform nearly all chunk allocations in the same thread that the chunk

is generated in.
2024-08-30 18:12:32 +00:00 · 2022-08-04 19:00:42 -07:00 · 2022-08-04 19:00:42 -07:00 · c577c21156
commit c577c21156
parent 04bb1e32d9
5 changed files with 99 additions and 48 deletions
--- a/voxygen/src/render/buffer.rs
+++ b/voxygen/src/render/buffer.rs
@ -45,6 +45,10 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
        Self(buffer)
    }

+    pub fn new_with_data(device: &wgpu::Device, usage: wgpu::BufferUsage, data: &[T]) -> Self {
+        Self(Buffer::new(device, usage | wgpu::BufferUsage::COPY_DST, data))
+    }
+
    pub fn update(&self, queue: &wgpu::Queue, vals: &[T], offset: usize) {
        if !vals.is_empty() {
            queue.write_buffer(
--- a/voxygen/src/render/instances.rs
+++ b/voxygen/src/render/instances.rs
@ -15,6 +15,14 @@ impl<T: Copy + Pod> Instances<T> {
        }
    }

+    pub fn new_with_data(device: &wgpu::Device, data: &[T]) -> Self {
+        Self {
+            // TODO: examine if we have Instances that are not updated (e.g. sprites) and if there
+            // would be any gains from separating those out
+            buf: DynamicBuffer::new_with_data(device, wgpu::BufferUsage::VERTEX, data),
+        }
+    }
+
    // TODO: count vs len naming scheme??
    pub fn count(&self) -> usize { self.buf.len() }

--- a/voxygen/src/render/renderer.rs
+++ b/voxygen/src/render/renderer.rs
@ -33,7 +33,7 @@ use super::{
 };
 use common::assets::{self, AssetExt, AssetHandle, ReloadWatcher};
 use common_base::span;
-use core::convert::TryFrom;
+use core::{sync::atomic::{AtomicUsize, Ordering}, convert::TryFrom};
 #[cfg(feature = "egui-ui")]
 use egui_wgpu_backend::wgpu::TextureFormat;
 use std::sync::Arc;
@ -46,8 +46,8 @@ use vek::*;
 // TODO: revert to u16
 pub type ColLightInfo = (Vec<[u8; 4]>, Vec2<u16>);

-const QUAD_INDEX_BUFFER_U16_START_VERT_LEN: u16 = 3000;
-const QUAD_INDEX_BUFFER_U32_START_VERT_LEN: u32 = 3000;
+const QUAD_INDEX_BUFFER_U16_VERT_LEN: u16 = u16::MAX;
+const QUAD_INDEX_BUFFER_U32_START_VERT_LEN: u32 = u16::MAX as u32;

 /// A type that stores all the layouts associated with this renderer that never
 /// change when the RenderMode is modified.
@ -160,6 +160,7 @@ pub struct Renderer {
    noise_tex: Texture,

    quad_index_buffer_u16: Buffer<u16>,
+    quad_index_buffer_u32_len: Arc<AtomicUsize>,
    quad_index_buffer_u32: Buffer<u32>,

    shaders: AssetHandle<Shaders>,
@ -494,7 +495,7 @@ impl Renderer {
        );

        let quad_index_buffer_u16 =
-            create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_START_VERT_LEN as usize);
+            create_quad_index_buffer_u16(&device, QUAD_INDEX_BUFFER_U16_VERT_LEN.into());
        let quad_index_buffer_u32 =
            create_quad_index_buffer_u32(&device, QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize);
        let mut profiler = wgpu_profiler::GpuProfiler::new(4, queue.get_timestamp_period());
@ -526,6 +527,7 @@ impl Renderer {

            quad_index_buffer_u16,
            quad_index_buffer_u32,
+            quad_index_buffer_u32_len: Arc::new(AtomicUsize::new(QUAD_INDEX_BUFFER_U32_START_VERT_LEN as usize)),

            shaders,
            shaders_watcher,
@ -1258,15 +1260,22 @@ impl Renderer {
        &mut self,
        vals: &[T],
    ) -> Result<Instances<T>, RenderError> {
-        let mut instances = Instances::new(&self.device, vals.len());
-        instances.update(&self.queue, vals, 0);
-        Ok(instances)
+        Ok(Instances::new_with_data(&self.device, vals))
    }

-    /// Ensure that the quad index buffer is large enough for a quad vertex
-    /// buffer with this many vertices
-    pub(super) fn ensure_sufficient_index_length<V: Vertex>(
+    /// Create a new set of instances with the provided values lazily (for use off the main
+    /// thread).
+    pub fn create_instances_lazy<T: Copy + bytemuck::Pod>(
        &mut self,
+    ) -> impl for<'a> Fn(&'a [T]) -> Instances<T> + Send + Sync {
+        let device = Arc::clone(&self.device);
+        move |vals| Instances::new_with_data(&device, &vals)
+    }
+
+    /// Update the expected index length to be large enough for a quad vertex bfufer with this many
+    /// vertices.
+    fn update_index_length<V: Vertex>(
+        quad_index_buffer_u32_len: &AtomicUsize,
        // Length of the vert buffer with 4 verts per quad
        vert_length: usize,
    ) {
@ -1274,55 +1283,78 @@ impl Renderer {

        match V::QUADS_INDEX {
            Some(wgpu::IndexFormat::Uint16) => {
-                // Make sure the global quad index buffer is large enough
-                if self.quad_index_buffer_u16.len() < quad_index_length {
-                    // Make sure we aren't over the max
-                    if vert_length > u16::MAX as usize {
-                        panic!(
-                            "Vertex type: {} needs to use a larger index type, length: {}",
-                            core::any::type_name::<V>(),
-                            vert_length
-                        );
-                    }
-                    self.quad_index_buffer_u16 =
-                        create_quad_index_buffer_u16(&self.device, vert_length);
-                }
+                // Index length is always sufficient.
            },
            Some(wgpu::IndexFormat::Uint32) => {
                // Make sure the global quad index buffer is large enough
+                let vert_length = quad_index_length.next_power_of_two();
+                if u32::try_from(vert_length).ok() <= Some(0) {
+                    panic!(
+                        "More than u32::MAX({}) verts({}) for type({}) using an index buffer!",
+                        u32::MAX,
+                        vert_length,
+                        core::any::type_name::<V>()
+                    );
+                }
+
+                // NOTE: This operation is monotonic, so Relaxed is sufficient.
+                quad_index_buffer_u32_len.fetch_update(
+                    Ordering::Relaxed,
+                    Ordering::Relaxed,
+                    |old_len| (old_len < quad_index_length).then_some(vert_length),
+                );
+                /* let old_len = quad_index_buffer_u32_len.load();
                if self.quad_index_buffer_u32.len() < quad_index_length {
                    // Make sure we aren't over the max
-                    if vert_length > u32::MAX as usize {
-                        panic!(
-                            "More than u32::MAX({}) verts({}) for type({}) using an index buffer!",
-                            u32::MAX,
-                            vert_length,
-                            core::any::type_name::<V>()
-                        );
-                    }
                    self.quad_index_buffer_u32 =
                        create_quad_index_buffer_u32(&self.device, vert_length);
-                }
+                } */
            },
            None => {},
        }
    }

+    /// Ensure that the quad index buffer is large enough for all quad vertices that we might
+    /// render.  Should be called before using the index buffers.  Only applies to rendering vertex
+    /// buffers whose creation synchronizes with the call to this function in some other way.
+    pub(super) fn ensure_sufficient_index_length(&mut self) {
+        // Make sure the global quad index buffer is large enough
+        //
+        // NOTE: Relaxed ordering is fine due to monotonicity, provided that we synchronize any
+        // rendered buffers with this function call in some other way.
+        let vert_length = self.quad_index_buffer_u32_len.load(Ordering::Relaxed);
+        if self.quad_index_buffer_u32.len() < vert_length {
+            self.quad_index_buffer_u32 =
+                create_quad_index_buffer_u32(&self.device, vert_length);
+        }
+    }
+
    pub fn create_sprite_verts(&mut self, mesh: Mesh<sprite::Vertex>) -> sprite::SpriteVerts {
-        self.ensure_sufficient_index_length::<sprite::Vertex>(sprite::VERT_PAGE_SIZE as usize);
+        Self::update_index_length::<sprite::Vertex>(&self.quad_index_buffer_u32_len, sprite::VERT_PAGE_SIZE as usize);
        sprite::create_verts_buffer(&self.device, mesh)
    }

    /// Create a new model from the provided mesh.
    /// If the provided mesh is empty this returns None
    pub fn create_model<V: Vertex>(&mut self, mesh: &Mesh<V>) -> Option<Model<V>> {
-        self.ensure_sufficient_index_length::<V>(mesh.vertices().len());
+        Self::update_index_length::<V>(&self.quad_index_buffer_u32_len, mesh.vertices().len());
        Model::new(&self.device, mesh)
    }

+    /// Create a new model from the provided mesh, lazily (for use off the main thread).
+    /// If the provided mesh is empty this returns None
+    pub fn create_model_lazy<V: Vertex>(&mut self) -> impl for<'a> Fn(&'a Mesh<V>) -> Option<Model<V>> + Send + Sync {
+        let device = Arc::clone(&self.device);
+        let quad_index_buffer_u32_len = Arc::clone(&self.quad_index_buffer_u32_len);
+        move |mesh| {
+            Self::update_index_length::<V>(&quad_index_buffer_u32_len, mesh.vertices().len());
+            Model::new(&device, mesh)
+        }
+    }
+
    /// Create a new dynamic model with the specified size.
    pub fn create_dynamic_model<V: Vertex>(&mut self, size: usize) -> DynamicModel<V> {
-        self.ensure_sufficient_index_length::<V>(size);
+        Self::update_index_length::<V>(&self.quad_index_buffer_u32_len, size);
        DynamicModel::new(&self.device, size)
    }

--- a/voxygen/src/render/renderer/drawer.rs
+++ b/voxygen/src/render/renderer/drawer.rs
@ -87,6 +87,8 @@ impl<'frame> Drawer<'frame> {
        swap_tex: wgpu::SwapChainTexture,
        globals: &'frame GlobalsBindGroup,
    ) -> Self {
+        renderer.ensure_sufficient_index_length();
+
        let taking_screenshot = renderer.take_screenshot.take().map(|screenshot_fn| {
            super::screenshot::TakeScreenshot::new(
                &renderer.device,
--- a/voxygen/src/scene/terrain.rs
+++ b/voxygen/src/scene/terrain.rs
@ -115,8 +115,8 @@ struct ChunkMeshState {
 pub struct MeshWorkerResponseMesh {
    z_bounds: (f32, f32),
    shadow_z_bounds: (f32, f32),
-    opaque_mesh: Mesh<TerrainVertex>,
-    fluid_mesh: Mesh<FluidVertex>,
+    opaque_model: Option<Model<TerrainVertex>>,
+    fluid_model: Option<Model<FluidVertex>>,
    col_lights_info: ColLightInfo,
    light_map: LightMapFn,
    glow_map: LightMapFn,
@ -126,7 +126,7 @@ pub struct MeshWorkerResponseMesh {
 /// mesh of a chunk.
 struct MeshWorkerResponse {
    pos: Vec2<i32>,
-    sprite_instances: [Vec<SpriteInstance>; SPRITE_LOD_LEVELS],
+    sprite_instances: [Instances<SpriteInstance>; SPRITE_LOD_LEVELS],
    /// If None, this update was requested without meshing.
    mesh: Option<MeshWorkerResponseMesh>,
    started_tick: u64,
@ -238,6 +238,9 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
    range: Aabb<i32>,
    sprite_data: &HashMap<(SpriteKind, usize), [SpriteData; SPRITE_LOD_LEVELS]>,
    sprite_config: &SpriteSpec,
+    create_opaque: impl for<'a> Fn(&'a Mesh<TerrainVertex>) -> Option<Model<TerrainVertex>> + Send + Sync,
+    create_fluid: impl for<'a> Fn(&'a Mesh<FluidVertex>) -> Option<Model<FluidVertex>> + Send + Sync,
+    create_instances: impl for<'a> Fn(&'a [SpriteInstance]) -> Instances<SpriteInstance> + Send + Sync,
 ) -> MeshWorkerResponse {
    span!(_guard, "mesh_worker");
    let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/;
@ -271,8 +274,8 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
            // we can ignore such cases for the purposes of determining a shadow bounding box (but
            // not visibility, unfortunately).
            shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)),
-            opaque_mesh,
-            fluid_mesh,
+            opaque_model: create_opaque(&opaque_mesh),
+            fluid_model: create_fluid(&fluid_mesh),
            col_lights_info,
            light_map,
            glow_map,
@ -355,7 +358,7 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
            } */
            }

-            instances
+            instances.map(|instances| create_instances(&instances))
        },
        mesh,
        blocks_of_interest,
@ -1201,6 +1204,9 @@ impl<V: RectRasterableVol> Terrain<V> {
            let sprite_data = Arc::clone(&self.sprite_data);
            let sprite_config = Arc::clone(&self.sprite_config);
            let cnt = Arc::clone(&self.mesh_todos_active);
+            let create_opaque = renderer.create_model_lazy();
+            let create_fluid = renderer.create_model_lazy();
+            let create_instances = renderer.create_instances_lazy();
            cnt.fetch_add(1, Ordering::Relaxed);
            scene_data
                .state
@ -1218,6 +1224,9 @@ impl<V: RectRasterableVol> Terrain<V> {
                        aabb,
                        &sprite_data,
                        &sprite_config,
+                        create_opaque,
+                        create_fluid,
+                        create_instances,
                    ));
                    cnt.fetch_sub(1, Ordering::Relaxed);
                });
@ -1244,11 +1253,7 @@ impl<V: RectRasterableVol> Terrain<V> {
                Some(todo) if response.started_tick <= todo.started_tick => {
                    let started_tick = todo.started_tick;

-                    let sprite_instances = response.sprite_instances.map(|instances| {
-                        renderer
-                            .create_instances(&instances)
-                            .expect("Failed to upload chunk sprite instances to the GPU!")
-                    });
+                    let sprite_instances = response.sprite_instances;

                    if let Some(mesh) = response.mesh {
                        // Full update, insert the whole chunk.
@ -1314,8 +1319,8 @@ impl<V: RectRasterableVol> Terrain<V> {

                        self.insert_chunk(response.pos, TerrainChunkData {
                            load_time,
-                            opaque_model: renderer.create_model(&mesh.opaque_mesh),
-                            fluid_model: renderer.create_model(&mesh.fluid_mesh),
+                            opaque_model: mesh.opaque_model,
+                            fluid_model: mesh.fluid_model,
                            col_lights_alloc: Some(allocation.id),
                            col_lights: Arc::clone(&self.col_lights),
                            light_map: mesh.light_map,