Remove more main thread bottlenecks.

2025-07-25 12:52:26 +00:00 · 2022-08-15 19:01:43 -07:00
parent 39db97ed03
commit 54847c726b
25 changed files with 360 additions and 152 deletions
--- a/client/src/lib.rs
+++ b/client/src/lib.rs
@ -398,6 +398,8 @@ impl Client {
                let mut state = State::client();
                // Client-only components
                state.ecs_mut().register::<comp::Last<CharacterState>>();
+                state.ecs_mut().write_resource::<SlowJobPool>()
+                    .configure(&"TERRAIN_DROP", |_n| 1);
                /* state.ecs_mut().write_resource::<SlowJobPool>()
                    .configure("TERRAIN_DESERIALIZING", |n| n / 2); */
                let entity = state.ecs_mut().apply_entity_package(entity_package);
@ -1835,9 +1837,14 @@ impl Client {
                    chunks_to_remove.push(key);
                }
            });
+            // TODO: Parallelize?
+            let slowjob = self.state.slow_job_pool();
            for key in chunks_to_remove {
-                self.state.remove_chunk(key);
+                let chunk = self.state.remove_chunk(key);
+                // Drop chunk in a background thread.
+                slowjob.spawn(&"TERRAIN_DROP", move || { drop(chunk); });
            }
+            drop(slowjob);

            let mut current_tick_send_chunk_requests = 0;
            // Request chunks from the server.
--- a/common/state/src/state.rs
+++ b/common/state/src/state.rs
@ -441,18 +441,18 @@ impl State {

    /// Remove the chunk with the given key from this state's terrain, if it
    /// exists.
-    pub fn remove_chunk(&mut self, key: Vec2<i32>) {
-        if self
+    pub fn remove_chunk(&self, key: Vec2<i32>) -> Option<Arc<TerrainChunk>> {
+        self
            .ecs
            .write_resource::<TerrainGrid>()
            .remove(key)
-            .is_some()
-        {
-            self.ecs
-                .write_resource::<TerrainChanges>()
-                .removed_chunks
-                .insert(key);
-        }
+            .map(|chunk| {
+                self.ecs
+                    .write_resource::<TerrainChanges>()
+                    .removed_chunks
+                    .insert(key);
+                chunk
+            })
    }

    // Run RegionMap tick to update entity region occupancy
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@ -191,7 +191,7 @@ impl BattleModeBuffer {
 }

 pub struct ChunkRequest {
-    entity: EcsEntity,
+    entity: Option<EcsEntity>,
    key: Vec2<i32>,
 }

--- a/server/src/sys/msg/mod.rs
+++ b/server/src/sys/msg/mod.rs
@ -21,7 +21,13 @@ pub fn add_server_systems(dispatch_builder: &mut DispatcherBuilder) {
    dispatch::<in_game::Sys>(dispatch_builder, &[]);
    dispatch::<ping::Sys>(dispatch_builder, &[&general::Sys::sys_name()]);
    dispatch::<register::Sys>(dispatch_builder, &[]);
-    dispatch::<terrain::Sys>(dispatch_builder, &[]);
+    // Unfortunately, this is currently desirable because otherwise we can miss chunk requests the
+    // first time around due to them not being within the view distance circle, requiring the
+    // client to time out before retrieving them again.
+    //
+    // This can also happen due to in-game commands like /site.  Unfortunately this is a lot harder
+    // to fix, because the in-game commands are not even processed in a system.
+    dispatch::<terrain::Sys>(dispatch_builder, &[&in_game::Sys::sys_name()]);
    dispatch::<pets::Sys>(dispatch_builder, &[]);
    dispatch::<loot::Sys>(dispatch_builder, &[]);
 }
--- a/server/src/sys/msg/terrain.rs
+++ b/server/src/sys/msg/terrain.rs
@ -92,7 +92,7 @@ impl<'a> System<'a> for Sys {
                                        });
                                    } else {
                                        network_metrics.chunks_generation_triggered.inc();
-                                        chunk_requests.push(ChunkRequest { entity, key });
+                                        chunk_requests.push(ChunkRequest { entity: Some(entity), key });
                                    }
                                } else {
                                    network_metrics.chunks_request_dropped.inc();
@ -133,8 +133,8 @@ impl<'a> System<'a> for Sys {
                                // TODO: @zesterer do we want to be sending these chunk to the
                                // client even if they aren't
                                // requested? If we don't we could replace the
-                                // entity here with Option<Entity> and pass in None.
-                                chunk_requests.push(ChunkRequest { entity, key });
+                                // entity here with None.
+                                chunk_requests.push(ChunkRequest { entity: None, key });
                            }
                        }
                    }
--- a/server/src/sys/terrain.rs
+++ b/server/src/sys/terrain.rs
@ -9,6 +9,7 @@ use crate::{
    chunk_generator::ChunkGenerator,
    chunk_serialize::ChunkSendEntry,
    client::Client,
+    // metrics::NetworkRequestMetrics,
    presence::{Presence, RepositionOnChunkLoad},
    rtsim::RtSim,
    settings::Settings,
@ -24,7 +25,8 @@ use common::{
    lottery::LootSpec,
    resources::{Time, TimeOfDay},
    slowjob::SlowJobPool,
-    terrain::TerrainGrid,
+    terrain::{/* TerrainChunkSize, */TerrainGrid},
+    vol::RectVolSize,
    SkillSetBuilder,
 };

@ -64,6 +66,7 @@ impl<'a> System<'a> for Sys {
        ReadExpect<'a, IndexOwned>,
        ReadExpect<'a, Arc<World>>,
        ReadExpect<'a, EventBus<ChunkSendEntry>>,
+        // ReadExpect<'a, NetworkRequestMetrics>,
        WriteExpect<'a, ChunkGenerator>,
        WriteExpect<'a, TerrainGrid>,
        Write<'a, TerrainChanges>,
@ -97,6 +100,7 @@ impl<'a> System<'a> for Sys {
            index,
            world,
            chunk_send_bus,
+            // network_metrics,
            mut chunk_generator,
            mut terrain,
            mut terrain_changes,
@ -114,6 +118,7 @@ impl<'a> System<'a> for Sys {
        ): Self::SystemData,
    ) {
        let mut server_emitter = server_event_bus.emitter();
+        // let mut chunk_send_emitter = chunk_send_bus.emitter();

        // Generate requested chunks
        //
@ -121,8 +126,35 @@ impl<'a> System<'a> for Sys {
        // don't create duplicate work for chunks that just finished but are not
        // yet added to the terrain.
        chunk_requests.drain(..).for_each(|request| {
+            /* if let Some(entity) = request.entity {
+                let in_vd = if let Some((pos, presence)) = positions.get(entity).zip(presences.get(entity)) {
+                    pos.0.xy().map(|e| e as f64).distance_squared(
+                        request.key.map(|e| e as f64 + 0.5)
+                            * TerrainChunkSize::RECT_SIZE.map(|e| e as f64),
+                    ) < ((presence.view_distance as f64 - 1.0
+                        + 2.5 * 2.0_f64.sqrt())
+                        * TerrainChunkSize::RECT_SIZE.x as f64)
+                        .powi(2)
+                } else {
+                    true
+                };
+                if in_vd {
+                    if terrain.get_key_arc(request.key).is_some() {
+                        network_metrics.chunks_served_from_memory.inc();
+                        chunk_send_emitter.emit(ChunkSendEntry {
+                            chunk_key: request.key,
+                            entity,
+                        });
+                        return;
+                    }
+                } else {
+                    network_metrics.chunks_request_dropped.inc();
+                    return;
+                }
+            }
+            network_metrics.chunks_request_dropped.inc(); */
            chunk_generator.generate_chunk(
-                Some(request.entity),
+                request.entity,
                request.key,
                &slow_jobs,
                Arc::clone(&world),
--- a/voxygen/src/hud/mod.rs
+++ b/voxygen/src/hud/mod.rs
@ -1242,7 +1242,7 @@ impl Hud {
    ) -> Vec<Event> {
        span!(_guard, "update_layout", "Hud::update_layout");
        let mut events = core::mem::take(&mut self.events);
-        if global_state.settings.interface.map_show_voxel_map {
+        if global_state.settings.interface.map_show_voxel_map && global_state.settings.interface.minimap_show {
            self.voxel_minimap.maintain(client, &mut self.ui);
        }
        let (ref mut ui_widgets, ref mut item_tooltip_manager, ref mut tooltip_manager) =
--- a/voxygen/src/lib.rs
+++ b/voxygen/src/lib.rs
@ -155,6 +155,11 @@ pub trait PlayState {
    /// Determines whether the play state should have an enforced FPS cap
    fn capped_fps(&self) -> bool;

+    /// Some scenes may have prepared command buffers they need to add directly to the queue.
+    fn pre_commands(&mut self) -> Vec<wgpu::CommandBuffer> {
+        vec![]
+    }
+
    fn globals_bind_group(&self) -> &GlobalsBindGroup;

    /// Draw the play state.
--- a/voxygen/src/mesh/greedy.rs
+++ b/voxygen/src/mesh/greedy.rs
@ -421,9 +421,12 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> {
    /// potentially use a single staged upload to the GPU.
    ///
    /// Returns the ColLightsInfo corresponding to the constructed atlas.
-    pub fn finalize(self) -> ColLightInfo {
+    pub fn finalize(self, alignment: Vec2<u16>) -> ColLightInfo {
        span!(_guard, "finalize", "GreedyMesh::finalize");
-        let cur_size = self.col_lights_size;
+        let mut cur_size = self.col_lights_size;
+        // Round to nearest alignment (assuming power of 2)
+        cur_size.x = (cur_size.x + alignment.x - 1) / alignment.x * alignment.x;
+        cur_size.y = (cur_size.y + alignment.y - 1) / alignment.y * alignment.y;
        let col_lights = vec![
            TerrainVertex::make_col_light(254, 0, Rgb::broadcast(254), true);
            cur_size.x as usize * cur_size.y as usize
--- a/voxygen/src/mesh/terrain.rs
+++ b/voxygen/src/mesh/terrain.rs
@ -996,7 +996,10 @@ pub fn generate_mesh<'a/*, V: RectRasterableVol<Vox = Block> + ReadVol + Debug +
        min: min_bounds,
        max: max_bounds + min_bounds,
    };
-    let (col_lights, col_lights_size) = greedy.finalize();
+    // WGPU requires this alignment.
+    let (col_lights, col_lights_size) = greedy.finalize(
+        Vec2::new((wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / 4) as u16, 1),
+    );

    (
        opaque_mesh,
--- a/voxygen/src/render/buffer.rs
+++ b/voxygen/src/render/buffer.rs
@ -76,8 +76,25 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
        }
    }

-    /// Update the GPU-side value represented by this constant handle, if it was previously memory
-    /// mapped, and then unmaps it.
+    /// Get the GPU-side mapped slice represented by this buffer handle, if it was previously
+    /// memory mapped.
+    ///
+    /// NOTE: Will panic if the buffer was not explicitly mapped before this (without being
+    /// unmapped), either directly or via [Buffer::new_mapped].
+    pub fn get_mapped_mut(&self, offset: usize, len: usize) -> wgpu::BufferViewMut<'_> {
+        /* if !vals.is_empty() {
+            let contents = bytemuck::cast_slice(vals); */
+
+            let size_ty = std::mem::size_of::<T>() as u64;
+            let offset = offset as u64 * size_ty;
+            let size = /*vals.len()*/len as u64 * size_ty;
+            /* bytemuck::cast_slice_mut(&mut */self.buf.slice(offset..offset + size).get_mapped_range_mut()/* ) */
+                /* .copy_from_slice(contents);
+        } */
+    }
+
+    /// Unmaps the GPU-side handle represented by this buffer handle, if it was previously
+    /// memory-mapped.
    ///
    /// NOTE: Will panic if the buffer was not explicitly mapped before this (without being
    /// unmapped), either directly or via [Buffer::new_mapped].
@ -85,8 +102,8 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
    /// NOTE: Queue is not *explicitly* used here, but it is implicitly used during the unmap
    /// (within wgpu internals) and requires acquiring a lock on it, so it's left in the API to
    /// deter people from using it when the queue isn't available.
-    pub fn update_mapped(&mut self, _queue: &wgpu::Queue, vals: &[T], offset: usize) {
-        if !vals.is_empty() {
+    pub fn unmap(&self, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */) {
+        /* if !vals.is_empty() {
            let contents = bytemuck::cast_slice(vals);

            let size_ty = std::mem::size_of::<T>() as u64;
@ -95,7 +112,7 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
            self.buf.slice(offset..offset + size)
                .get_mapped_range_mut()
                .copy_from_slice(contents);
-        }
+        } */
        self.buf.unmap();
    }
 }
--- a/voxygen/src/render/consts.rs
+++ b/voxygen/src/render/consts.rs
@ -39,11 +39,19 @@ impl<T: Copy + Pod> Consts<T> {
        self.buf.update(queue, vals, offset)
    }

-    /// Update the GPU-side value represented by this constant handle, if it was previously memory
-    /// mapped, and then immediately unmaps it.
-    pub fn update_mapped(&mut self, queue: &wgpu::Queue, vals: &[T], offset: usize) {
-        self.buf.update_mapped(queue, vals, offset)
+    /// Get the GPU-side mapped slice represented by this constant handle, if it was previously
+    /// memory mapped.
+    pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
+        self.buf.get_mapped_mut(offset, len)
+    }
+
+    /// Unmaps the GPU-side handle represented by this constant handle, if it was previously
+    /// memory-mapped.
+    pub fn unmap(&self, queue: &wgpu::Queue) {
+        self.buf.unmap(queue);
    }

    pub fn buf(&self) -> &wgpu::Buffer { &self.buf.buf }
+
+    pub fn len(&self) -> usize { self.buf.len() }
 }
--- a/voxygen/src/render/mod.rs
+++ b/voxygen/src/render/mod.rs
@ -66,6 +66,11 @@ pub trait Vertex: Clone + bytemuck::Pod {
    const QUADS_INDEX: Option<wgpu::IndexFormat>;
 }

+impl Vertex for [u8; 4] {
+    const QUADS_INDEX: Option<wgpu::IndexFormat> = None;
+    const STRIDE: wgpu::BufferAddress = core::mem::size_of::<Self>() as wgpu::BufferAddress;
+}
+
 use serde::{Deserialize, Serialize};
 /// Anti-aliasing modes
 #[derive(PartialEq, Clone, Copy, Debug, Serialize, Deserialize)]
--- a/voxygen/src/render/model.rs
+++ b/voxygen/src/render/model.rs
@ -30,13 +30,13 @@ pub struct Model<V: Vertex> {

 impl<V: Vertex> Model<V> {
    /// Returns None if the provided mesh is empty
-    pub fn new(device: &wgpu::Device, mesh: &Mesh<V>) -> Option<Self> {
+    pub fn new(device: &wgpu::Device, usage: wgpu::BufferUsage, mesh: &Mesh<V>) -> Option<Self> {
        if mesh.vertices().is_empty() {
            return None;
        }

        Some(Self {
-            vbuf: Buffer::new(device, wgpu::BufferUsage::VERTEX, mesh.vertices()),
+            vbuf: Buffer::new(device, /*wgpu::BufferUsage::VERTEX*/usage, mesh.vertices()),
        })
    }

@ -50,7 +50,7 @@ impl<V: Vertex> Model<V> {
        }
    }

-    pub(super) fn buf(&self) -> &wgpu::Buffer { &self.vbuf.buf }
+    pub fn buf(&self) -> &wgpu::Buffer { &self.vbuf.buf }

    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize { self.vbuf.len() }
--- a/voxygen/src/render/pipelines/terrain.rs
+++ b/voxygen/src/render/pipelines/terrain.rs
@ -1,6 +1,6 @@
 use super::super::{AaMode, Bound, Consts, GlobalsLayouts, Vertex as VertexTrait};
 use bytemuck::{Pod, Zeroable};
-use std::mem;
+use std::{mem, sync::Arc};
 use vek::*;

 #[repr(C)]
@ -136,6 +136,11 @@ impl VertexTrait for Vertex {
    const STRIDE: wgpu::BufferAddress = mem::size_of::<Self>() as wgpu::BufferAddress;
 }

+/// Needs to be aligned / padded to this value to fulfill wgpu spec (4 is just the number of u64s
+/// we currently have in Locals, we could replace Locals with LocalsInner or something if we wanted
+/// to make this more robust).
+const PADDING_LEN: usize = wgpu::BIND_BUFFER_ALIGNMENT as usize / mem::size_of::<u64>() - 4;
+
 #[repr(C)]
 #[derive(Copy, Clone, Debug, Zeroable, Pod)]
 // TODO: new function and private fields??
@ -143,6 +148,7 @@ pub struct Locals {
    model_offs: [f32; 3],
    load_time: f32,
    atlas_offs: [i32; 4],
+    padding: [u64; PADDING_LEN],
 }

 impl Locals {
@ -151,6 +157,7 @@ impl Locals {
            model_offs: model_offs.into_array(),
            load_time,
            atlas_offs: Vec4::new(atlas_offs.x as i32, atlas_offs.y as i32, 0, 0).into_array(),
+            .. Self::default()
        }
    }

@ -159,11 +166,12 @@ impl Locals {
            model_offs: [0.0; 3],
            load_time: 0.0,
            atlas_offs: [0; 4],
+            padding: [0; PADDING_LEN],
        }
    }
 }

-pub type BoundLocals = Bound<Consts<Locals>>;
+pub type BoundLocals = Bound<()>;

 pub struct TerrainLayout {
    pub locals: wgpu::BindGroupLayout,
@ -191,19 +199,23 @@ impl TerrainLayout {
        }
    }

-    pub fn bind_locals(&self, device: &wgpu::Device, locals: Consts<Locals>) -> BoundLocals {
+    pub fn bind_locals(&self, device: &wgpu::Device, locals: &Consts<Locals>, offset: usize) -> BoundLocals {
        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
            label: None,
            layout: &self.locals,
            entries: &[wgpu::BindGroupEntry {
                binding: 0,
-                resource: locals.buf().as_entire_binding(),
+                resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                    buffer: locals.buf(),
+                    offset: (offset * mem::size_of::<Locals>()) as wgpu::BufferAddress,
+                    size: wgpu::BufferSize::new(mem::size_of::<Locals>() as u64),
+                })
            }],
        });

        BoundLocals {
            bind_group,
-            with: locals,
+            with: /*locals*/(),
        }
    }
 }
--- a/voxygen/src/render/renderer.rs
+++ b/voxygen/src/render/renderer.rs
@ -139,8 +139,8 @@ enum State {
 /// GPU, along with pipeline state objects (PSOs) needed to renderer different
 /// kinds of models to the screen.
 pub struct Renderer {
-    device: Arc<wgpu::Device>,
-    queue: wgpu::Queue,
+    pub(crate) device: Arc<wgpu::Device>,
+    pub(crate) queue: wgpu::Queue,
    surface: wgpu::Surface,
    swap_chain: wgpu::SwapChain,
    sc_desc: wgpu::SwapChainDescriptor,
@ -998,6 +998,7 @@ impl Renderer {
    /// be returned
    pub fn start_recording_frame<'a>(
        &'a mut self,
+        pre_commands: Vec<wgpu::CommandBuffer>,
        globals: &'a GlobalsBindGroup,
    ) -> Result<Option<drawer::Drawer<'a>>, RenderError> {
        span!(
@ -1214,7 +1215,7 @@ impl Renderer {
                label: Some("A render encoder"),
            });

-        Ok(Some(drawer::Drawer::new(encoder, self, tex, globals)))
+        Ok(Some(drawer::Drawer::new(encoder, self, tex, pre_commands, globals)))
    }

    /// Recreate the pipelines
@ -1265,14 +1266,26 @@ impl Renderer {
        Consts::new_with_data(device, vals)
    }

+    pub fn create_consts_mapped<T: Copy + bytemuck::Pod>(
+        &mut self,
+        len: usize,
+    ) -> Consts<T> {
+        Consts::new_mapped(&self.device, len)
+    }
+
    /// Update a set of constants with the provided values.
    pub fn update_consts<T: Copy + bytemuck::Pod>(&self, consts: &mut Consts<T>, vals: &[T]) {
        consts.update(&self.queue, vals, 0)
    }

-    /// Update a set of memory mapped constants with the provided values.
-    pub fn update_mapped<T: Copy + bytemuck::Pod>(&self, consts: &mut Consts<T>, vals: &[T]) {
-        consts.update_mapped(&self.queue, vals, 0)
+    /// Gets a memory mapped buffer of a set of constants.
+    pub fn get_consts_mapped<'a, T: Copy + bytemuck::Pod>(&self, consts: &'a Consts<T>) -> /* &'a mut [T] */wgpu::BufferViewMut<'a> {
+        consts.get_mapped_mut(0, consts.len())
+    }
+
+    /// Unmaps a set of memory mapped constants.
+    pub fn unmap_consts<T: Copy + bytemuck::Pod>(&self, consts: &Consts<T>) {
+        consts.unmap(&self.queue)
    }

    pub fn update_clouds_locals(&mut self, new_val: clouds::Locals) {
@ -1369,17 +1382,17 @@ impl Renderer {
    /// If the provided mesh is empty this returns None
    pub fn create_model<V: Vertex>(&mut self, mesh: &Mesh<V>) -> Option<Model<V>> {
        Self::update_index_length::<V>(&self.quad_index_buffer_u32_len, mesh.vertices().len());
-        Model::new(&self.device, mesh)
+        Model::new(&self.device, wgpu::BufferUsage::VERTEX, mesh)
    }

    /// Create a new model from the provided mesh, lazily (for use off the main thread).
    /// If the provided mesh is empty this returns None
-    pub fn create_model_lazy<V: Vertex>(&mut self) -> impl for<'a> Fn(&'a Mesh<V>) -> Option<Model<V>> + Send + Sync {
+    pub fn create_model_lazy<V: Vertex>(&mut self, usage: wgpu::BufferUsage) -> impl for<'a> Fn(&'a Mesh<V>) -> Option<Model<V>> + Send + Sync {
        let device = Arc::clone(&self.device);
        let quad_index_buffer_u32_len = Arc::clone(&self.quad_index_buffer_u32_len);
        move |mesh| {
            Self::update_index_length::<V>(&quad_index_buffer_u32_len, mesh.vertices().len());
-            Model::new(&device, mesh)
+            Model::new(&device, usage, mesh)
        }
    }

@ -1444,14 +1457,11 @@ impl Renderer {
    ///
    /// NOTE: This is done lazily--the returned function must be invoked to actually create the
    /// texture.  This allows creating the texture on another thread.
-    pub fn create_texture_raw<'a>(
+    pub fn create_texture_raw(
        &mut self,
-        texture_info: wgpu::TextureDescriptor<'a>,
-        view_info: wgpu::TextureViewDescriptor<'a>,
-        sampler_info: wgpu::SamplerDescriptor<'a>,
-    ) -> impl FnOnce() -> Texture + Send + Sync + 'a {
+    ) -> impl for<'a> Fn(wgpu::TextureDescriptor<'a>, wgpu::TextureViewDescriptor<'a>, wgpu::SamplerDescriptor<'a>) -> Texture + Send + Sync {
        let device = Arc::clone(&self.device);
-        move || {
+        move |texture_info, view_info, sampler_info| {
            let texture = Texture::new_raw(&device, &texture_info, &view_info, &sampler_info);
            texture
        }
@ -1508,8 +1518,8 @@ impl Renderer {
    /// Replaces the destination texture with the contents of the source texture.
    ///
    /// The source size should at least fit within the destination texture's size.
-    pub fn replace_texture(&mut self, dest: &Texture, source: &Texture) {
-        dest.replace(&self.device, &self.queue, source);
+    pub fn replace_texture(&mut self, encoder: &mut wgpu::CommandEncoder, dest: &Texture, source: &Texture) {
+        dest.replace(&self.device, encoder, source);
    }

    /// Queue to obtain a screenshot on the next frame render
--- a/voxygen/src/render/renderer/binding.rs
+++ b/voxygen/src/render/renderer/binding.rs
@ -78,13 +78,16 @@ impl Renderer {
    /// before use.
    pub fn create_terrain_bound_locals(
        &mut self,
-    ) -> /*for<'a> Fn(&'a [terrain::Locals]) -> terrain::BoundLocals + Send + Sync*/impl Fn() -> terrain::BoundLocals + Send + Sync {
-        let device = Arc::clone(&self.device);
+        locals: /*Arc<*/&Consts<terrain::Locals>/*>*/,
+        offset: usize,
+    ) -> /*for<'a> Fn(&'a [terrain::Locals]) -> terrain::BoundLocals + Send + Sync*//* impl Fn() -> terrain::BoundLocals + Send + Sync */terrain::BoundLocals {
+        /* let device = Arc::clone(&self.device);
        let immutable = Arc::clone(&self.layouts.immutable);
        move || {
            let locals = Consts::new_mapped(&device, 1);
            immutable.terrain.bind_locals(&device, locals)
-        }
+        } */
+        self.layouts.immutable.terrain.bind_locals(&self.device, locals, offset)
    }

    pub fn create_shadow_bound_locals(&mut self, locals: &[shadow::Locals]) -> shadow::BoundLocals {
--- a/voxygen/src/render/renderer/drawer.rs
+++ b/voxygen/src/render/renderer/drawer.rs
@ -72,6 +72,7 @@ struct RendererBorrow<'frame> {

 pub struct Drawer<'frame> {
    encoder: Option<ManualOwningScope<'frame, wgpu::CommandEncoder>>,
+    pub(crate) pre_commands: Vec<wgpu::CommandBuffer>,
    borrow: RendererBorrow<'frame>,
    swap_tex: wgpu::SwapChainTexture,
    globals: &'frame GlobalsBindGroup,
@ -85,6 +86,7 @@ impl<'frame> Drawer<'frame> {
        encoder: wgpu::CommandEncoder,
        renderer: &'frame mut Renderer,
        swap_tex: wgpu::SwapChainTexture,
+        pre_commands: Vec<wgpu::CommandBuffer>,
        globals: &'frame GlobalsBindGroup,
    ) -> Self {
        renderer.ensure_sufficient_index_length();
@ -128,6 +130,7 @@ impl<'frame> Drawer<'frame> {

        Self {
            encoder: Some(encoder),
+            pre_commands,
            borrow,
            swap_tex,
            globals,
@ -640,7 +643,7 @@ impl<'frame> Drop for Drawer<'frame> {
        profiler.resolve_queries(&mut encoder);

        // It is recommended to only do one submit per frame
-        self.borrow.queue.submit(std::iter::once(encoder.finish()));
+        self.borrow.queue.submit(self.pre_commands.drain(..).chain(std::iter::once(encoder.finish())));
        // Need to call this after submit so the async mapping doesn't occur before
        // copying the screenshot to the buffer which will be mapped.
        if let Some(f) = download_and_handle_screenshot {
--- a/voxygen/src/render/texture.rs
+++ b/voxygen/src/render/texture.rs
@ -193,12 +193,7 @@ impl Texture {
    /// Replaces this texture with the contents of another texture.
    ///
    /// The source size should at least fit within this texture's size.
-    pub fn replace(&self, device: &wgpu::Device, queue: &wgpu::Queue, texture: &Self) {
-        let mut encoder = device
-            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
-                label: Some("Replace the texture buffer"),
-            });
-
+    pub fn replace<'a>(&self, device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, texture: &Self) {
        // Copy image
        encoder.copy_texture_to_texture(
            wgpu::ImageCopyTexture {
@ -217,9 +212,6 @@ impl Texture {
                depth_or_array_layers: 1,
            },
        );
-
-        // TODO: Delay submission, don't just submit immediately out of convenience!
-        queue.submit(std::iter::once(encoder.finish()));
    }

    /// Update a texture with the given data (used for updating the glyph cache
--- a/voxygen/src/run.rs
+++ b/voxygen/src/run.rs
@ -206,10 +206,11 @@ fn handle_main_events_cleared(
        span!(guard, "Render");

        // Render the screen using the global renderer
+        let is_egui_enabled = last.egui_enabled();
        if let Some(mut drawer) = global_state
            .window
            .renderer_mut()
-            .start_recording_frame(last.globals_bind_group())
+            .start_recording_frame(last.pre_commands(), last.globals_bind_group())
            .expect("Unrecoverable render error when starting a new frame!")
        {
            if global_state.clear_shadows_next_frame {
@ -219,7 +220,7 @@ fn handle_main_events_cleared(
            last.render(&mut drawer, &global_state.settings);

            #[cfg(feature = "egui-ui")]
-            if last.egui_enabled() && global_state.settings.interface.egui_enabled() {
+            if is_egui_enabled && global_state.settings.interface.egui_enabled() {
                drawer.draw_egui(&mut global_state.egui_state.platform, scale_factor);
            }
        };
--- a/voxygen/src/scene/figure/cache.rs
+++ b/voxygen/src/scene/figure/cache.rs
@ -464,7 +464,7 @@ where
                    ];

                    slot_.store(Some(MeshWorkerResponse {
-                        col_light: greedy.finalize(),
+                        col_light: greedy.finalize(Vec2::broadcast(1)),
                        opaque,
                        bounds: figure_bounds,
                        vertex_range: models,
--- a/voxygen/src/scene/mod.rs
+++ b/voxygen/src/scene/mod.rs
@ -1137,6 +1137,11 @@ impl Scene {
        self.music_mgr.maintain(audio, scene_data.state, client);
    }

+    pub fn pre_commands(&mut self) -> Vec<wgpu::CommandBuffer> {
+        // We may have some initial work to do before we get started (usually uploading textures).
+        core::mem::replace(&mut self.terrain.command_buffers, vec![])
+    }
+
    pub fn global_bind_group(&self) -> &GlobalsBindGroup { &self.globals_bind_group }

    /// Render the scene using the provided `Drawer`.
--- a/voxygen/src/scene/simple.rs
+++ b/voxygen/src/scene/simple.rs
@ -147,7 +147,7 @@ impl Scene {
                let range = 0..opaque_mesh.vertices().len() as u32;
                let model =
                    col_lights
-                        .create_figure(renderer, greedy.finalize(), (opaque_mesh, bounds), [range]);
+                        .create_figure(renderer, greedy.finalize(Vec2::broadcast(1)), (opaque_mesh, bounds), [range]);
                let mut buf = [Default::default(); anim::MAX_BONE_COUNT];
                let common_params = FigureUpdateCommonParameters {
                    entity: None,
--- a/voxygen/src/scene/terrain.rs
+++ b/voxygen/src/scene/terrain.rs
@ -10,7 +10,7 @@ use crate::{
    },
    render::{
        pipelines::{self, ColLights},
-        ColLightInfo, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model,
+        ColLightInfo, Consts, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model,
        RenderError, Renderer, SpriteGlobalsBindGroup, SpriteInstance, SpriteVertex, SpriteVerts,
        TerrainLocals, TerrainShadowDrawer, TerrainVertex, Texture, SPRITE_VERT_PAGE_SIZE,
    },
@ -31,7 +31,7 @@ use common::{
    volumes::vol_grid_2d::{VolGrid2d, VolGrid2dError},
 };
 use common_base::{prof_span, span};
-use core::{f32, fmt::Debug, marker::PhantomData, time::Duration};
+use core::{f32, fmt::Debug, marker::PhantomData, num::NonZeroU32, time::Duration};
 use crossbeam_channel as channel;
 use enum_iterator::IntoEnumIterator;
 use guillotiere::AtlasAllocator;
@ -133,8 +133,8 @@ pub struct MeshWorkerResponseMesh {
    opaque_model: Option<Model<TerrainVertex>>,
    fluid_model: Option<Model<FluidVertex>>,
    /// NOTE: These are memory mapped, and must be unmapped!
-    locals: pipelines::terrain::BoundLocals,
-    col_lights_info: ColLightInfo,
+    /* locals: pipelines::terrain::BoundLocals, */
+    col_lights_info: /*ColLightInfo*/(Option<Model<[u8; 4]>>, Vec2<u16>),
    light_map: LightMapFn,
    glow_map: LightMapFn,
 }
@ -260,7 +260,8 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
    create_opaque: impl for<'a> Fn(&'a Mesh<TerrainVertex>) -> Option<Model<TerrainVertex>>,
    create_fluid: impl for<'a> Fn(&'a Mesh<FluidVertex>) -> Option<Model<FluidVertex>>,
    create_instances: impl for<'a> Fn(&'a [SpriteInstance]) -> Instances<SpriteInstance>,
-    create_locals: impl Fn() -> pipelines::terrain::BoundLocals,
+    /* create_locals: impl Fn() -> pipelines::terrain::BoundLocals, */
+    create_texture: impl for<'a> Fn(/* wgpu::TextureDescriptor<'a>, wgpu::TextureViewDescriptor<'a>, wgpu::SamplerDescriptor<'a>*/&'a Mesh<[u8; 4]>) -> /*Texture + Send + Sync*/Option<Model<[u8; 4]>>,
 ) -> MeshWorkerResponse {
    span!(_guard, "mesh_worker");
    let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/;
@ -286,6 +287,9 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
                    &blocks_of_interest,
                ),
            );
+        let mut tex_ = Mesh::new();
+        *tex_.vertices_mut_vec() = col_lights_info.0;
+        let tex = create_texture(&tex_);
        mesh = Some(MeshWorkerResponseMesh {
            // TODO: Take sprite bounds into account somehow?
            z_bounds: (bounds.min.z, bounds.max.z),
@ -296,8 +300,8 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
            shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)),
            opaque_model: create_opaque(&opaque_mesh),
            fluid_model: create_fluid(&fluid_mesh),
-            locals: create_locals(),
-            col_lights_info,
+            /* locals: create_locals(), */
+            col_lights_info: (tex, col_lights_info.1),
            light_map,
            glow_map,
        });
@ -454,6 +458,8 @@ pub struct Terrain<V: RectRasterableVol = TerrainChunk> {
    /// for any particular chunk; look at the `texture` field in
    /// `TerrainChunkData` for that.
    col_lights: Arc<ColLights<pipelines::terrain::Locals>>,
+    /// Used to complete terrain texture updates.
+    pub(crate) command_buffers: Vec<wgpu::CommandBuffer>,

    phantom: PhantomData<V>,
 }
@ -595,7 +601,7 @@ impl SpriteRenderContext {

            let sprite_col_lights = {
                prof_span!("finalize");
-                greedy.finalize()
+                greedy.finalize(Vec2::broadcast(1))
            };

            SpriteWorkerResponse {
@ -664,7 +670,8 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
            // TODO: Verify some good empirical constants.
            small_size_threshold: 128,
            large_size_threshold: 1024,
-            ..guillotiere::AllocatorOptions::default()
+            // NOTE: Required by wgpu spec.
+            alignment: guillotiere::Size::new((wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / 4) as i32, 1),
        });

        // Number of background atlases to have prepared at a time.  It is unlikely we would ever
@ -711,6 +718,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                &sprite_render_context.sprite_verts_buffer,
            ),
            col_lights: Arc::new(col_lights),
+            command_buffers: vec![],
            phantom: PhantomData,
        }
    }
@ -719,13 +727,15 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
    /// read one when count is 0, and we can create extra atlases as count moves higher).
    ///
    /// `old_texture` is an optional argument representing an old texture with the same size and
-    /// (ideally) format as the new \atlas.
+    /// (ideally) format as the new atlas.  It also includes an encoder, since when we need a new
+    /// atlas texture after the initial one we are already in the process of encoding more
+    /// commands.
    fn make_atlas(
        slowjob: &SlowJobPool,
        renderer: &mut Renderer,
        new_atlas_tx: &mut channel::Sender<Texture>,
        new_atlas_rx: &mut channel::Receiver<Texture>,
-        old_texture: Option<&Texture>,
+        old_texture: Option<(&Texture, &mut wgpu::CommandEncoder)>,
        count: usize,
    ) -> Result<ColLights<pipelines::terrain::Locals>, channel::RecvError> {
        span!(_guard, "make_atlas", "Terrain::make_atlas");
@ -733,49 +743,49 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
        let atlas_size = guillotiere::Size::new(max_texture_size as i32, max_texture_size as i32);
        (0..=count).for_each(|_| {
            let new_atlas_tx = new_atlas_tx.clone();
-            let texture_fn = renderer.create_texture_raw(
-                wgpu::TextureDescriptor {
-                    label: Some("Atlas texture"),
-                    size: wgpu::Extent3d {
-                        width: max_texture_size,
-                        height: max_texture_size,
-                        depth_or_array_layers: 1,
-                    },
-                    mip_level_count: 1,
-                    sample_count: 1,
-                    dimension: wgpu::TextureDimension::D2,
-                    format: wgpu::TextureFormat::Rgba8Unorm,
-                    // NOTE: COPY_SRC is used for the hack we use to work around zeroing, it
-                    // shouldn't be needed otherwise.
-                    usage: wgpu::TextureUsage::COPY_SRC | wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED,
-                },
-                wgpu::TextureViewDescriptor {
-                    label: Some("Atlas texture view"),
-                    format: Some(wgpu::TextureFormat::Rgba8Unorm),
-                    dimension: Some(wgpu::TextureViewDimension::D2),
-                    aspect: wgpu::TextureAspect::All,
-                    base_mip_level: 0,
-                    mip_level_count: None,
-                    base_array_layer: 0,
-                    array_layer_count: None,
-                },
-                wgpu::SamplerDescriptor {
-                    label: Some("Atlas sampler"),
-                    address_mode_u: wgpu::AddressMode::ClampToEdge,
-                    address_mode_v: wgpu::AddressMode::ClampToEdge,
-                    address_mode_w: wgpu::AddressMode::ClampToEdge,
-                    mag_filter: wgpu::FilterMode::Linear,
-                    min_filter: wgpu::FilterMode::Linear,
-                    mipmap_filter: wgpu::FilterMode::Nearest,
-                    ..Default::default()
-                },
-            );
+            let texture_fn = renderer.create_texture_raw();
            slowjob.spawn(&"IMAGE_PROCESSING", move || {
-                    // Construct the next atlas on a separate thread.  If it doesn't get sent, it means
-                    // the original channel was dropped, which implies the terrain scene data no longer
-                    // exists, so we can just drop the result in that case.
-                    let _ = new_atlas_tx.send(texture_fn());
-                });
+                // Construct the next atlas on a separate thread.  If it doesn't get sent, it means
+                // the original channel was dropped, which implies the terrain scene data no longer
+                // exists, so we can just drop the result in that case.
+                let _ = new_atlas_tx.send(texture_fn(
+                    wgpu::TextureDescriptor {
+                        label: Some("Atlas texture"),
+                        size: wgpu::Extent3d {
+                            width: max_texture_size,
+                            height: max_texture_size,
+                            depth_or_array_layers: 1,
+                        },
+                        mip_level_count: 1,
+                        sample_count: 1,
+                        dimension: wgpu::TextureDimension::D2,
+                        format: wgpu::TextureFormat::Rgba8Unorm,
+                        // NOTE: COPY_SRC is used for the hack we use to work around zeroing, it
+                        // shouldn't be needed otherwise.
+                        usage: wgpu::TextureUsage::COPY_SRC | wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED,
+                    },
+                    wgpu::TextureViewDescriptor {
+                        label: Some("Atlas texture view"),
+                        format: Some(wgpu::TextureFormat::Rgba8Unorm),
+                        dimension: Some(wgpu::TextureViewDimension::D2),
+                        aspect: wgpu::TextureAspect::All,
+                        base_mip_level: 0,
+                        mip_level_count: None,
+                        base_array_layer: 0,
+                        array_layer_count: None,
+                    },
+                    wgpu::SamplerDescriptor {
+                        label: Some("Atlas sampler"),
+                        address_mode_u: wgpu::AddressMode::ClampToEdge,
+                        address_mode_v: wgpu::AddressMode::ClampToEdge,
+                        address_mode_w: wgpu::AddressMode::ClampToEdge,
+                        mag_filter: wgpu::FilterMode::Linear,
+                        min_filter: wgpu::FilterMode::Linear,
+                        mipmap_filter: wgpu::FilterMode::Nearest,
+                        ..Default::default()
+                    },
+                ));
+            });
        });

        // Receive the most recent available atlas.  This call blocks only when there was no time
@ -787,9 +797,9 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
        // neither exists, and uploading a zero texture can be slow.  Fortunately, we almost always
        // have an existing texture to use in this case, so we can replace the explicit clear with
        // a copy from the previous atlas, skipping the CPU->GPU upload.
-        if let Some(old_texture) = old_texture {
+        if let Some((old_texture, encoder)) = old_texture {
            // TODO: Delay submission, don't just submit immediately out of convenience!
-            renderer.replace_texture(&texture, old_texture);
+            renderer.replace_texture(encoder, &texture, old_texture);
        } else {
            renderer.clear_texture(&texture);
        }
@ -808,9 +818,11 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
        self.z_index_down.remove(Vec3::from(zmax, pos.x, pos.y)); */
    }

-    fn insert_chunk(chunks: &mut HashMap<Vec2<i32>, TerrainChunkData>, atlas: &mut AtlasAllocator, pos: Vec2<i32>, chunk: TerrainChunkData) {
+    fn insert_chunk(slowjob: &SlowJobPool, chunks: &mut HashMap<Vec2<i32>, TerrainChunkData>, atlas: &mut AtlasAllocator, pos: Vec2<i32>, chunk: TerrainChunkData) {
        if let Some(old) = chunks.insert(pos, chunk) {
            Self::remove_chunk_meta(atlas, pos, &old);
+            // Drop the chunk on another thread.
+            slowjob.spawn(&"TERRAIN_DROP", move || { drop(old); });
        }
        /* let (zmin, zmax) = chunk.z_bounds;
        self.z_index_up.insert(Vec3::from(zmin, pos.x, pos.y));
@ -1154,16 +1166,32 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {

        span!(guard, "Queue meshing from todo list");
        let mesh_focus_pos = focus_pos.map(|e| e.trunc()).xy().as_::<i64>();
+        let mut min_active_dist = i64::MAX;
        let mut todo = self
            .mesh_todo
            .values_mut()
-            .filter(|todo| todo.status != ChunkWorkerStatus::Active)
+            .map(|todo| {
+                (
+                    (todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>())
+                    .distance_squared(mesh_focus_pos),
+                    todo
+                )
+            })
+            .filter(|(dist, todo)| {
+                if todo.status == ChunkWorkerStatus::Active {
+                    min_active_dist = min_active_dist.min(*dist);
+                    false
+                } else {
+                    true
+                }
+            })
            // TODO: BinaryHeap
            .collect::<Vec<_>>();
-        todo.sort_unstable_by_key(|todo| {
+        todo.sort_unstable_by_key(|(dist, todo)| {
            (
-                (todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>())
-                    .distance_squared(mesh_focus_pos),
+                // Sort from back to front for stale or to-be-stale objects, since they get pushed
+                // in reverse order.
+                if *dist < min_active_dist || todo.status == ChunkWorkerStatus::Stale { -*dist } else { *dist },
                todo.started_tick.load(Ordering::Relaxed),
            )
        });
@ -1173,7 +1201,12 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
            /* .filter(|todo| todo.status != ChunkWorkerStatus::Active) */
            /* .min_by_key(|todo| ((todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>()).distance_squared(mesh_focus_pos), todo.started_tick)) */
            // Find a reference to the actual `TerrainChunk` we're meshing
-            ./*and_then*/filter_map(|todo| {
+            ./*and_then*/filter_map(|(dist, mut todo)| {
+                if dist < min_active_dist {
+                    // Heuristic: if this chunk is lower than *any* currently active chunk, insert
+                    // it at the front.
+                    todo.status = ChunkWorkerStatus::Stale;
+                }
                let pos = todo.pos;
                Some((todo, terrain
                    .get_key_arc(pos)
@ -1249,11 +1282,12 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
            let sprite_data = Arc::clone(&self.sprite_data);
            let sprite_config = Arc::clone(&self.sprite_config);
            let cnt = Arc::clone(&self.mesh_todos_active);
-            let create_opaque = renderer.create_model_lazy();
-            let create_fluid = renderer.create_model_lazy();
+            let create_opaque = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX);
+            let create_fluid = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX);
            let create_instances = renderer.create_instances_lazy();
-            let create_locals = renderer.create_terrain_bound_locals();
-            cnt.fetch_add(1, Ordering::Relaxed);
+            /* let create_locals = renderer.create_terrain_bound_locals(); */
+            let create_texture = renderer./*create_texture_raw*/create_model_lazy(wgpu::BufferUsage::COPY_SRC);
+            /* cnt.fetch_add(1, Ordering::Relaxed); */
            let job = move || {
                // Since this loads when the task actually *runs*, rather than when it's
                // queued, it provides us with a good opportunity to check whether the chunk
@ -1277,14 +1311,16 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                        create_opaque,
                        create_fluid,
                        create_instances,
-                        create_locals,
+                        /* create_locals, */
+                        create_texture,
                    ));
+                    cnt.fetch_add(1, Ordering::Relaxed);
                }
-                cnt.fetch_sub(1, Ordering::Relaxed);
+                /* cnt.fetch_sub(1, Ordering::Relaxed); */
            };
            if todo.status == ChunkWorkerStatus::Stale {
-                // The chunk was updated unexpectedly, so insert at the front, not the back, to see
-                // the update as soon as possible.
+                // The chunk was updated out of order, so insert at the front, not
+                // the back, to see the update as soon as possible.
                slowjob.spawn_front(&"TERRAIN_MESHING", job);
            } else {
                slowjob.spawn(&"TERRAIN_MESHING", job);
@ -1303,10 +1339,23 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
            scene_data.state.get_delta_time() * CHUNKS_PER_SECOND + self.mesh_recv_overflow;
        self.mesh_recv_overflow = recv_count.fract();
        let mesh_recv = &self.mesh_recv;
+        let max_recv_count = self.mesh_todos_active.load(Ordering::Relaxed).min(recv_count.floor() as u64);
        let incoming_chunks =
            std::iter::from_fn(|| mesh_recv.try_recv().ok())
-                .take(recv_count.floor() as usize);
-        for response in incoming_chunks {
+                .take(/* recv_count.floor() as usize */max_recv_count as usize);
+        self.mesh_todos_active.fetch_sub(max_recv_count, Ordering::Relaxed);
+        if max_recv_count > 0 {
+        // Construct a buffer for all the chunks we're going to process in this frame.  There might
+        // be some unused slots, which is fine.
+        let locals = /*Arc::new(*/renderer.create_consts_mapped(max_recv_count as usize)/*)*/;
+        let mut locals_buffer = renderer.get_consts_mapped(&locals);
+        let mut encoder = renderer.device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+                label: Some("Update textures."),
+            });
+
+        let locals_buffer_ = bytemuck::cast_slice_mut(&mut *locals_buffer);
+        for (locals_offset, (response, locals_buffer)) in incoming_chunks.zip(locals_buffer_).enumerate() {
            match self.mesh_todo.get(&response.pos) {
                // It's the mesh we want, insert the newly finished model into the terrain model
                // data structure (convert the mesh to a model first of course).
@ -1330,6 +1379,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                            .unwrap_or(current_time as f32);
                        // TODO: Allocate new atlas on allocation failure.
                        let (tex, tex_size) = mesh.col_lights_info;
+                        let tex = tex.expect("The mesh exists, so the texture should too.");
                        let atlas = &mut self.atlas;
                        let chunks = &mut self.chunks;
                        let col_lights = &mut self.col_lights;
@ -1346,7 +1396,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                                    renderer,
                                    new_atlas_tx,
                                    new_atlas_rx,
-                                    Some(&col_lights.texture),
+                                    Some((&col_lights.texture, &mut encoder)),
                                    0
                                )
                                .expect("Failed to create atlas texture");
@ -1377,15 +1427,43 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                            allocation.rectangle.min.x as u32,
                            allocation.rectangle.min.y as u32,
                        );
-                        renderer.update_texture(
+                        /* renderer.update_texture(
                            &col_lights.texture,
                            atlas_offs.into_array(),
                            tex_size.map(u32::from).into_array(),
                            &tex,
+                        ); */
+                        // Copy image
+                        let tex_size = allocation.rectangle.size().to_array();
+                        let bytes_per_pixel = wgpu::TextureFormat::Rgba8Unorm.describe().block_size as u32;
+                        encoder.copy_buffer_to_texture(
+                            wgpu::ImageCopyBuffer {
+                                buffer: tex.buf(),
+                                layout: wgpu::ImageDataLayout {
+                                    offset: 0,
+                                    bytes_per_row: NonZeroU32::new(tex_size[0] as u32 * bytes_per_pixel),
+                                    rows_per_image: NonZeroU32::new(tex_size[1] as u32),
+                                },
+                            },
+                            wgpu::ImageCopyTexture {
+                                texture: &col_lights.texture.tex,
+                                mip_level: 0,
+                                origin: wgpu::Origin3d {
+                                    x: atlas_offs.x,
+                                    y: atlas_offs.y,
+                                    z: 0,
+                                },
+                            },
+                            wgpu::Extent3d {
+                                width: tex_size[0] as u32,
+                                height: tex_size[1] as u32,
+                                depth_or_array_layers: 1,
+                            },
                        );

                        // Update the memory mapped locals.
-                        renderer.update_mapped(&mut mesh.locals, &[TerrainLocals::new(
+                        *locals_buffer =
+                        /* renderer.update_mapped(&mut mesh.locals, &[*/TerrainLocals::new(
                            Vec3::from(
                                response.pos.map2(VolGrid2d::<V>::chunk_size(), |e, sz| {
                                    e as f32 * sz as f32
@ -1393,9 +1471,10 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                            ),
                            atlas_offs,
                            load_time,
-                        )]);
+                        )/*])*/;

-                        Self::insert_chunk(&mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData {
+                        /* let locals = Arc::clone(&locals); */
+                        Self::insert_chunk(&slowjob, &mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData {
                            load_time,
                            opaque_model: mesh.opaque_model,
                            fluid_model: mesh.fluid_model,
@ -1404,7 +1483,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                            light_map: mesh.light_map,
                            glow_map: mesh.glow_map,
                            sprite_instances,
-                            locals: mesh.locals,
+                            locals: /* mesh.locals */renderer.create_terrain_bound_locals(&locals, locals_offset),
                            visible: Visibility {
                                in_range: false,
                                in_frustum: false,
@ -1432,7 +1511,13 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                None => {},
            }
        }
-        drop(slowjob);
+        // Drop the memory mapping and unmap the locals.
+        drop(locals_buffer);
+        renderer.unmap_consts(&locals);
+        /* // TODO: Delay submission, don't just submit immediately out of convenience!
+        renderer.queue.submit(std::iter::once(encoder.finish())); */
+        self.command_buffers.push(encoder.finish());
+        }
        drop(guard);

        // Construct view frustum
@ -1588,18 +1673,25 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
            // shadows at the same time.
            let chunks = &self.chunks;
            self.shadow_chunks
-                .retain(|(pos, chunk)| !chunks.contains_key(pos) && can_shadow_sun(*pos, chunk));
+                .drain_filter(|(pos, chunk)| chunks.contains_key(pos) || !can_shadow_sun(*pos, chunk))
+                .for_each(|(pos, chunk)| {
+                    // Drop the chunk on another thread.
+                    slowjob.spawn(&"TERRAIN_DROP", move || { drop(chunk); });
+                });

            (visible_light_volume, visible_bounds)
        } else {
            // There's no daylight or no shadows, so there's no reason to keep any
            // shadow chunks around.
-            self.shadow_chunks.clear();
+            let chunks = core::mem::replace(&mut self.shadow_chunks, Vec::new());
+            // Drop the chunks on another thread.
+            slowjob.spawn(&"TERRAIN_DROP", move || { drop(chunks); });
            (Vec::new(), math::Aabr {
                min: math::Vec2::zero(),
                max: math::Vec2::zero(),
            })
        };
+        drop(slowjob);
        drop(guard);
        span!(guard, "Rain occlusion magic");
        // Check if there is rain near the camera
--- a/voxygen/src/session/mod.rs
+++ b/voxygen/src/session/mod.rs
@ -1650,6 +1650,10 @@ impl PlayState for SessionState {

    fn globals_bind_group(&self) -> &GlobalsBindGroup { self.scene.global_bind_group() }

+    fn pre_commands(&mut self) -> Vec<wgpu::CommandBuffer> {
+        self.scene.pre_commands()
+    }
+
    /// Render the session to the screen.
    ///
    /// This method should be called once per frame.