Remove more main thread bottlenecks.

This commit is contained in:
Joshua Yanovski 2022-08-15 19:01:43 -07:00
parent 39db97ed03
commit 54847c726b
25 changed files with 360 additions and 152 deletions

View File

@ -398,6 +398,8 @@ impl Client {
let mut state = State::client();
// Client-only components
state.ecs_mut().register::<comp::Last<CharacterState>>();
state.ecs_mut().write_resource::<SlowJobPool>()
.configure(&"TERRAIN_DROP", |_n| 1);
/* state.ecs_mut().write_resource::<SlowJobPool>()
.configure("TERRAIN_DESERIALIZING", |n| n / 2); */
let entity = state.ecs_mut().apply_entity_package(entity_package);
@ -1835,9 +1837,14 @@ impl Client {
chunks_to_remove.push(key);
}
});
// TODO: Parallelize?
let slowjob = self.state.slow_job_pool();
for key in chunks_to_remove {
self.state.remove_chunk(key);
let chunk = self.state.remove_chunk(key);
// Drop chunk in a background thread.
slowjob.spawn(&"TERRAIN_DROP", move || { drop(chunk); });
}
drop(slowjob);
let mut current_tick_send_chunk_requests = 0;
// Request chunks from the server.

View File

@ -441,18 +441,18 @@ impl State {
/// Remove the chunk with the given key from this state's terrain, if it
/// exists.
pub fn remove_chunk(&mut self, key: Vec2<i32>) {
if self
pub fn remove_chunk(&self, key: Vec2<i32>) -> Option<Arc<TerrainChunk>> {
self
.ecs
.write_resource::<TerrainGrid>()
.remove(key)
.is_some()
{
self.ecs
.write_resource::<TerrainChanges>()
.removed_chunks
.insert(key);
}
.map(|chunk| {
self.ecs
.write_resource::<TerrainChanges>()
.removed_chunks
.insert(key);
chunk
})
}
// Run RegionMap tick to update entity region occupancy

View File

@ -191,7 +191,7 @@ impl BattleModeBuffer {
}
pub struct ChunkRequest {
entity: EcsEntity,
entity: Option<EcsEntity>,
key: Vec2<i32>,
}

View File

@ -21,7 +21,13 @@ pub fn add_server_systems(dispatch_builder: &mut DispatcherBuilder) {
dispatch::<in_game::Sys>(dispatch_builder, &[]);
dispatch::<ping::Sys>(dispatch_builder, &[&general::Sys::sys_name()]);
dispatch::<register::Sys>(dispatch_builder, &[]);
dispatch::<terrain::Sys>(dispatch_builder, &[]);
// Unfortunately, this is currently desirable because otherwise we can miss chunk requests the
// first time around due to them not being within the view distance circle, requiring the
// client to time out before retrieving them again.
//
// This can also happen due to in-game commands like /site. Unfortunately this is a lot harder
// to fix, because the in-game commands are not even processed in a system.
dispatch::<terrain::Sys>(dispatch_builder, &[&in_game::Sys::sys_name()]);
dispatch::<pets::Sys>(dispatch_builder, &[]);
dispatch::<loot::Sys>(dispatch_builder, &[]);
}

View File

@ -92,7 +92,7 @@ impl<'a> System<'a> for Sys {
});
} else {
network_metrics.chunks_generation_triggered.inc();
chunk_requests.push(ChunkRequest { entity, key });
chunk_requests.push(ChunkRequest { entity: Some(entity), key });
}
} else {
network_metrics.chunks_request_dropped.inc();
@ -133,8 +133,8 @@ impl<'a> System<'a> for Sys {
// TODO: @zesterer do we want to be sending these chunk to the
// client even if they aren't
// requested? If we don't we could replace the
// entity here with Option<Entity> and pass in None.
chunk_requests.push(ChunkRequest { entity, key });
// entity here with None.
chunk_requests.push(ChunkRequest { entity: None, key });
}
}
}

View File

@ -9,6 +9,7 @@ use crate::{
chunk_generator::ChunkGenerator,
chunk_serialize::ChunkSendEntry,
client::Client,
// metrics::NetworkRequestMetrics,
presence::{Presence, RepositionOnChunkLoad},
rtsim::RtSim,
settings::Settings,
@ -24,7 +25,8 @@ use common::{
lottery::LootSpec,
resources::{Time, TimeOfDay},
slowjob::SlowJobPool,
terrain::TerrainGrid,
terrain::{/* TerrainChunkSize, */TerrainGrid},
vol::RectVolSize,
SkillSetBuilder,
};
@ -64,6 +66,7 @@ impl<'a> System<'a> for Sys {
ReadExpect<'a, IndexOwned>,
ReadExpect<'a, Arc<World>>,
ReadExpect<'a, EventBus<ChunkSendEntry>>,
// ReadExpect<'a, NetworkRequestMetrics>,
WriteExpect<'a, ChunkGenerator>,
WriteExpect<'a, TerrainGrid>,
Write<'a, TerrainChanges>,
@ -97,6 +100,7 @@ impl<'a> System<'a> for Sys {
index,
world,
chunk_send_bus,
// network_metrics,
mut chunk_generator,
mut terrain,
mut terrain_changes,
@ -114,6 +118,7 @@ impl<'a> System<'a> for Sys {
): Self::SystemData,
) {
let mut server_emitter = server_event_bus.emitter();
// let mut chunk_send_emitter = chunk_send_bus.emitter();
// Generate requested chunks
//
@ -121,8 +126,35 @@ impl<'a> System<'a> for Sys {
// don't create duplicate work for chunks that just finished but are not
// yet added to the terrain.
chunk_requests.drain(..).for_each(|request| {
/* if let Some(entity) = request.entity {
let in_vd = if let Some((pos, presence)) = positions.get(entity).zip(presences.get(entity)) {
pos.0.xy().map(|e| e as f64).distance_squared(
request.key.map(|e| e as f64 + 0.5)
* TerrainChunkSize::RECT_SIZE.map(|e| e as f64),
) < ((presence.view_distance as f64 - 1.0
+ 2.5 * 2.0_f64.sqrt())
* TerrainChunkSize::RECT_SIZE.x as f64)
.powi(2)
} else {
true
};
if in_vd {
if terrain.get_key_arc(request.key).is_some() {
network_metrics.chunks_served_from_memory.inc();
chunk_send_emitter.emit(ChunkSendEntry {
chunk_key: request.key,
entity,
});
return;
}
} else {
network_metrics.chunks_request_dropped.inc();
return;
}
}
network_metrics.chunks_request_dropped.inc(); */
chunk_generator.generate_chunk(
Some(request.entity),
request.entity,
request.key,
&slow_jobs,
Arc::clone(&world),

View File

@ -1242,7 +1242,7 @@ impl Hud {
) -> Vec<Event> {
span!(_guard, "update_layout", "Hud::update_layout");
let mut events = core::mem::take(&mut self.events);
if global_state.settings.interface.map_show_voxel_map {
if global_state.settings.interface.map_show_voxel_map && global_state.settings.interface.minimap_show {
self.voxel_minimap.maintain(client, &mut self.ui);
}
let (ref mut ui_widgets, ref mut item_tooltip_manager, ref mut tooltip_manager) =

View File

@ -155,6 +155,11 @@ pub trait PlayState {
/// Determines whether the play state should have an enforced FPS cap
fn capped_fps(&self) -> bool;
/// Some scenes may have prepared command buffers they need to add directly to the queue.
fn pre_commands(&mut self) -> Vec<wgpu::CommandBuffer> {
vec![]
}
fn globals_bind_group(&self) -> &GlobalsBindGroup;
/// Draw the play state.

View File

@ -421,9 +421,12 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> {
/// potentially use a single staged upload to the GPU.
///
/// Returns the ColLightsInfo corresponding to the constructed atlas.
pub fn finalize(self) -> ColLightInfo {
pub fn finalize(self, alignment: Vec2<u16>) -> ColLightInfo {
span!(_guard, "finalize", "GreedyMesh::finalize");
let cur_size = self.col_lights_size;
let mut cur_size = self.col_lights_size;
// Round to nearest alignment (assuming power of 2)
cur_size.x = (cur_size.x + alignment.x - 1) / alignment.x * alignment.x;
cur_size.y = (cur_size.y + alignment.y - 1) / alignment.y * alignment.y;
let col_lights = vec![
TerrainVertex::make_col_light(254, 0, Rgb::broadcast(254), true);
cur_size.x as usize * cur_size.y as usize

View File

@ -996,7 +996,10 @@ pub fn generate_mesh<'a/*, V: RectRasterableVol<Vox = Block> + ReadVol + Debug +
min: min_bounds,
max: max_bounds + min_bounds,
};
let (col_lights, col_lights_size) = greedy.finalize();
// WGPU requires this alignment.
let (col_lights, col_lights_size) = greedy.finalize(
Vec2::new((wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / 4) as u16, 1),
);
(
opaque_mesh,

View File

@ -76,8 +76,25 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
}
}
/// Update the GPU-side value represented by this constant handle, if it was previously memory
/// mapped, and then unmaps it.
/// Get the GPU-side mapped slice represented by this buffer handle, if it was previously
/// memory mapped.
///
/// NOTE: Will panic if the buffer was not explicitly mapped before this (without being
/// unmapped), either directly or via [Buffer::new_mapped].
pub fn get_mapped_mut(&self, offset: usize, len: usize) -> wgpu::BufferViewMut<'_> {
/* if !vals.is_empty() {
let contents = bytemuck::cast_slice(vals); */
let size_ty = std::mem::size_of::<T>() as u64;
let offset = offset as u64 * size_ty;
let size = /*vals.len()*/len as u64 * size_ty;
/* bytemuck::cast_slice_mut(&mut */self.buf.slice(offset..offset + size).get_mapped_range_mut()/* ) */
/* .copy_from_slice(contents);
} */
}
/// Unmaps the GPU-side handle represented by this buffer handle, if it was previously
/// memory-mapped.
///
/// NOTE: Will panic if the buffer was not explicitly mapped before this (without being
/// unmapped), either directly or via [Buffer::new_mapped].
@ -85,8 +102,8 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
/// NOTE: Queue is not *explicitly* used here, but it is implicitly used during the unmap
/// (within wgpu internals) and requires acquiring a lock on it, so it's left in the API to
/// deter people from using it when the queue isn't available.
pub fn update_mapped(&mut self, _queue: &wgpu::Queue, vals: &[T], offset: usize) {
if !vals.is_empty() {
pub fn unmap(&self, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */) {
/* if !vals.is_empty() {
let contents = bytemuck::cast_slice(vals);
let size_ty = std::mem::size_of::<T>() as u64;
@ -95,7 +112,7 @@ impl<T: Copy + Pod> DynamicBuffer<T> {
self.buf.slice(offset..offset + size)
.get_mapped_range_mut()
.copy_from_slice(contents);
}
} */
self.buf.unmap();
}
}

View File

@ -39,11 +39,19 @@ impl<T: Copy + Pod> Consts<T> {
self.buf.update(queue, vals, offset)
}
/// Update the GPU-side value represented by this constant handle, if it was previously memory
/// mapped, and then immediately unmaps it.
pub fn update_mapped(&mut self, queue: &wgpu::Queue, vals: &[T], offset: usize) {
self.buf.update_mapped(queue, vals, offset)
/// Get the GPU-side mapped slice represented by this constant handle, if it was previously
/// memory mapped.
pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
self.buf.get_mapped_mut(offset, len)
}
/// Unmaps the GPU-side handle represented by this constant handle, if it was previously
/// memory-mapped.
pub fn unmap(&self, queue: &wgpu::Queue) {
self.buf.unmap(queue);
}
pub fn buf(&self) -> &wgpu::Buffer { &self.buf.buf }
pub fn len(&self) -> usize { self.buf.len() }
}

View File

@ -66,6 +66,11 @@ pub trait Vertex: Clone + bytemuck::Pod {
const QUADS_INDEX: Option<wgpu::IndexFormat>;
}
impl Vertex for [u8; 4] {
const QUADS_INDEX: Option<wgpu::IndexFormat> = None;
const STRIDE: wgpu::BufferAddress = core::mem::size_of::<Self>() as wgpu::BufferAddress;
}
use serde::{Deserialize, Serialize};
/// Anti-aliasing modes
#[derive(PartialEq, Clone, Copy, Debug, Serialize, Deserialize)]

View File

@ -30,13 +30,13 @@ pub struct Model<V: Vertex> {
impl<V: Vertex> Model<V> {
/// Returns None if the provided mesh is empty
pub fn new(device: &wgpu::Device, mesh: &Mesh<V>) -> Option<Self> {
pub fn new(device: &wgpu::Device, usage: wgpu::BufferUsage, mesh: &Mesh<V>) -> Option<Self> {
if mesh.vertices().is_empty() {
return None;
}
Some(Self {
vbuf: Buffer::new(device, wgpu::BufferUsage::VERTEX, mesh.vertices()),
vbuf: Buffer::new(device, /*wgpu::BufferUsage::VERTEX*/usage, mesh.vertices()),
})
}
@ -50,7 +50,7 @@ impl<V: Vertex> Model<V> {
}
}
pub(super) fn buf(&self) -> &wgpu::Buffer { &self.vbuf.buf }
pub fn buf(&self) -> &wgpu::Buffer { &self.vbuf.buf }
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize { self.vbuf.len() }

View File

@ -1,6 +1,6 @@
use super::super::{AaMode, Bound, Consts, GlobalsLayouts, Vertex as VertexTrait};
use bytemuck::{Pod, Zeroable};
use std::mem;
use std::{mem, sync::Arc};
use vek::*;
#[repr(C)]
@ -136,6 +136,11 @@ impl VertexTrait for Vertex {
const STRIDE: wgpu::BufferAddress = mem::size_of::<Self>() as wgpu::BufferAddress;
}
/// Needs to be aligned / padded to this value to fulfill wgpu spec (4 is just the number of u64s
/// we currently have in Locals, we could replace Locals with LocalsInner or something if we wanted
/// to make this more robust).
const PADDING_LEN: usize = wgpu::BIND_BUFFER_ALIGNMENT as usize / mem::size_of::<u64>() - 4;
#[repr(C)]
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
// TODO: new function and private fields??
@ -143,6 +148,7 @@ pub struct Locals {
model_offs: [f32; 3],
load_time: f32,
atlas_offs: [i32; 4],
padding: [u64; PADDING_LEN],
}
impl Locals {
@ -151,6 +157,7 @@ impl Locals {
model_offs: model_offs.into_array(),
load_time,
atlas_offs: Vec4::new(atlas_offs.x as i32, atlas_offs.y as i32, 0, 0).into_array(),
.. Self::default()
}
}
@ -159,11 +166,12 @@ impl Locals {
model_offs: [0.0; 3],
load_time: 0.0,
atlas_offs: [0; 4],
padding: [0; PADDING_LEN],
}
}
}
pub type BoundLocals = Bound<Consts<Locals>>;
pub type BoundLocals = Bound<()>;
pub struct TerrainLayout {
pub locals: wgpu::BindGroupLayout,
@ -191,19 +199,23 @@ impl TerrainLayout {
}
}
pub fn bind_locals(&self, device: &wgpu::Device, locals: Consts<Locals>) -> BoundLocals {
pub fn bind_locals(&self, device: &wgpu::Device, locals: &Consts<Locals>, offset: usize) -> BoundLocals {
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &self.locals,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: locals.buf().as_entire_binding(),
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
buffer: locals.buf(),
offset: (offset * mem::size_of::<Locals>()) as wgpu::BufferAddress,
size: wgpu::BufferSize::new(mem::size_of::<Locals>() as u64),
})
}],
});
BoundLocals {
bind_group,
with: locals,
with: /*locals*/(),
}
}
}

View File

@ -139,8 +139,8 @@ enum State {
/// GPU, along with pipeline state objects (PSOs) needed to renderer different
/// kinds of models to the screen.
pub struct Renderer {
device: Arc<wgpu::Device>,
queue: wgpu::Queue,
pub(crate) device: Arc<wgpu::Device>,
pub(crate) queue: wgpu::Queue,
surface: wgpu::Surface,
swap_chain: wgpu::SwapChain,
sc_desc: wgpu::SwapChainDescriptor,
@ -998,6 +998,7 @@ impl Renderer {
/// be returned
pub fn start_recording_frame<'a>(
&'a mut self,
pre_commands: Vec<wgpu::CommandBuffer>,
globals: &'a GlobalsBindGroup,
) -> Result<Option<drawer::Drawer<'a>>, RenderError> {
span!(
@ -1214,7 +1215,7 @@ impl Renderer {
label: Some("A render encoder"),
});
Ok(Some(drawer::Drawer::new(encoder, self, tex, globals)))
Ok(Some(drawer::Drawer::new(encoder, self, tex, pre_commands, globals)))
}
/// Recreate the pipelines
@ -1265,14 +1266,26 @@ impl Renderer {
Consts::new_with_data(device, vals)
}
pub fn create_consts_mapped<T: Copy + bytemuck::Pod>(
&mut self,
len: usize,
) -> Consts<T> {
Consts::new_mapped(&self.device, len)
}
/// Update a set of constants with the provided values.
pub fn update_consts<T: Copy + bytemuck::Pod>(&self, consts: &mut Consts<T>, vals: &[T]) {
consts.update(&self.queue, vals, 0)
}
/// Update a set of memory mapped constants with the provided values.
pub fn update_mapped<T: Copy + bytemuck::Pod>(&self, consts: &mut Consts<T>, vals: &[T]) {
consts.update_mapped(&self.queue, vals, 0)
/// Gets a memory mapped buffer of a set of constants.
pub fn get_consts_mapped<'a, T: Copy + bytemuck::Pod>(&self, consts: &'a Consts<T>) -> /* &'a mut [T] */wgpu::BufferViewMut<'a> {
consts.get_mapped_mut(0, consts.len())
}
/// Unmaps a set of memory mapped constants.
pub fn unmap_consts<T: Copy + bytemuck::Pod>(&self, consts: &Consts<T>) {
consts.unmap(&self.queue)
}
pub fn update_clouds_locals(&mut self, new_val: clouds::Locals) {
@ -1369,17 +1382,17 @@ impl Renderer {
/// If the provided mesh is empty this returns None
pub fn create_model<V: Vertex>(&mut self, mesh: &Mesh<V>) -> Option<Model<V>> {
Self::update_index_length::<V>(&self.quad_index_buffer_u32_len, mesh.vertices().len());
Model::new(&self.device, mesh)
Model::new(&self.device, wgpu::BufferUsage::VERTEX, mesh)
}
/// Create a new model from the provided mesh, lazily (for use off the main thread).
/// If the provided mesh is empty this returns None
pub fn create_model_lazy<V: Vertex>(&mut self) -> impl for<'a> Fn(&'a Mesh<V>) -> Option<Model<V>> + Send + Sync {
pub fn create_model_lazy<V: Vertex>(&mut self, usage: wgpu::BufferUsage) -> impl for<'a> Fn(&'a Mesh<V>) -> Option<Model<V>> + Send + Sync {
let device = Arc::clone(&self.device);
let quad_index_buffer_u32_len = Arc::clone(&self.quad_index_buffer_u32_len);
move |mesh| {
Self::update_index_length::<V>(&quad_index_buffer_u32_len, mesh.vertices().len());
Model::new(&device, mesh)
Model::new(&device, usage, mesh)
}
}
@ -1444,14 +1457,11 @@ impl Renderer {
///
/// NOTE: This is done lazily--the returned function must be invoked to actually create the
/// texture. This allows creating the texture on another thread.
pub fn create_texture_raw<'a>(
pub fn create_texture_raw(
&mut self,
texture_info: wgpu::TextureDescriptor<'a>,
view_info: wgpu::TextureViewDescriptor<'a>,
sampler_info: wgpu::SamplerDescriptor<'a>,
) -> impl FnOnce() -> Texture + Send + Sync + 'a {
) -> impl for<'a> Fn(wgpu::TextureDescriptor<'a>, wgpu::TextureViewDescriptor<'a>, wgpu::SamplerDescriptor<'a>) -> Texture + Send + Sync {
let device = Arc::clone(&self.device);
move || {
move |texture_info, view_info, sampler_info| {
let texture = Texture::new_raw(&device, &texture_info, &view_info, &sampler_info);
texture
}
@ -1508,8 +1518,8 @@ impl Renderer {
/// Replaces the destination texture with the contents of the source texture.
///
/// The source size should at least fit within the destination texture's size.
pub fn replace_texture(&mut self, dest: &Texture, source: &Texture) {
dest.replace(&self.device, &self.queue, source);
pub fn replace_texture(&mut self, encoder: &mut wgpu::CommandEncoder, dest: &Texture, source: &Texture) {
dest.replace(&self.device, encoder, source);
}
/// Queue to obtain a screenshot on the next frame render

View File

@ -78,13 +78,16 @@ impl Renderer {
/// before use.
pub fn create_terrain_bound_locals(
&mut self,
) -> /*for<'a> Fn(&'a [terrain::Locals]) -> terrain::BoundLocals + Send + Sync*/impl Fn() -> terrain::BoundLocals + Send + Sync {
let device = Arc::clone(&self.device);
locals: /*Arc<*/&Consts<terrain::Locals>/*>*/,
offset: usize,
) -> /*for<'a> Fn(&'a [terrain::Locals]) -> terrain::BoundLocals + Send + Sync*//* impl Fn() -> terrain::BoundLocals + Send + Sync */terrain::BoundLocals {
/* let device = Arc::clone(&self.device);
let immutable = Arc::clone(&self.layouts.immutable);
move || {
let locals = Consts::new_mapped(&device, 1);
immutable.terrain.bind_locals(&device, locals)
}
} */
self.layouts.immutable.terrain.bind_locals(&self.device, locals, offset)
}
pub fn create_shadow_bound_locals(&mut self, locals: &[shadow::Locals]) -> shadow::BoundLocals {

View File

@ -72,6 +72,7 @@ struct RendererBorrow<'frame> {
pub struct Drawer<'frame> {
encoder: Option<ManualOwningScope<'frame, wgpu::CommandEncoder>>,
pub(crate) pre_commands: Vec<wgpu::CommandBuffer>,
borrow: RendererBorrow<'frame>,
swap_tex: wgpu::SwapChainTexture,
globals: &'frame GlobalsBindGroup,
@ -85,6 +86,7 @@ impl<'frame> Drawer<'frame> {
encoder: wgpu::CommandEncoder,
renderer: &'frame mut Renderer,
swap_tex: wgpu::SwapChainTexture,
pre_commands: Vec<wgpu::CommandBuffer>,
globals: &'frame GlobalsBindGroup,
) -> Self {
renderer.ensure_sufficient_index_length();
@ -128,6 +130,7 @@ impl<'frame> Drawer<'frame> {
Self {
encoder: Some(encoder),
pre_commands,
borrow,
swap_tex,
globals,
@ -640,7 +643,7 @@ impl<'frame> Drop for Drawer<'frame> {
profiler.resolve_queries(&mut encoder);
// It is recommended to only do one submit per frame
self.borrow.queue.submit(std::iter::once(encoder.finish()));
self.borrow.queue.submit(self.pre_commands.drain(..).chain(std::iter::once(encoder.finish())));
// Need to call this after submit so the async mapping doesn't occur before
// copying the screenshot to the buffer which will be mapped.
if let Some(f) = download_and_handle_screenshot {

View File

@ -193,12 +193,7 @@ impl Texture {
/// Replaces this texture with the contents of another texture.
///
/// The source size should at least fit within this texture's size.
pub fn replace(&self, device: &wgpu::Device, queue: &wgpu::Queue, texture: &Self) {
let mut encoder = device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Replace the texture buffer"),
});
pub fn replace<'a>(&self, device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, texture: &Self) {
// Copy image
encoder.copy_texture_to_texture(
wgpu::ImageCopyTexture {
@ -217,9 +212,6 @@ impl Texture {
depth_or_array_layers: 1,
},
);
// TODO: Delay submission, don't just submit immediately out of convenience!
queue.submit(std::iter::once(encoder.finish()));
}
/// Update a texture with the given data (used for updating the glyph cache

View File

@ -206,10 +206,11 @@ fn handle_main_events_cleared(
span!(guard, "Render");
// Render the screen using the global renderer
let is_egui_enabled = last.egui_enabled();
if let Some(mut drawer) = global_state
.window
.renderer_mut()
.start_recording_frame(last.globals_bind_group())
.start_recording_frame(last.pre_commands(), last.globals_bind_group())
.expect("Unrecoverable render error when starting a new frame!")
{
if global_state.clear_shadows_next_frame {
@ -219,7 +220,7 @@ fn handle_main_events_cleared(
last.render(&mut drawer, &global_state.settings);
#[cfg(feature = "egui-ui")]
if last.egui_enabled() && global_state.settings.interface.egui_enabled() {
if is_egui_enabled && global_state.settings.interface.egui_enabled() {
drawer.draw_egui(&mut global_state.egui_state.platform, scale_factor);
}
};

View File

@ -464,7 +464,7 @@ where
];
slot_.store(Some(MeshWorkerResponse {
col_light: greedy.finalize(),
col_light: greedy.finalize(Vec2::broadcast(1)),
opaque,
bounds: figure_bounds,
vertex_range: models,

View File

@ -1137,6 +1137,11 @@ impl Scene {
self.music_mgr.maintain(audio, scene_data.state, client);
}
pub fn pre_commands(&mut self) -> Vec<wgpu::CommandBuffer> {
// We may have some initial work to do before we get started (usually uploading textures).
core::mem::replace(&mut self.terrain.command_buffers, vec![])
}
pub fn global_bind_group(&self) -> &GlobalsBindGroup { &self.globals_bind_group }
/// Render the scene using the provided `Drawer`.

View File

@ -147,7 +147,7 @@ impl Scene {
let range = 0..opaque_mesh.vertices().len() as u32;
let model =
col_lights
.create_figure(renderer, greedy.finalize(), (opaque_mesh, bounds), [range]);
.create_figure(renderer, greedy.finalize(Vec2::broadcast(1)), (opaque_mesh, bounds), [range]);
let mut buf = [Default::default(); anim::MAX_BONE_COUNT];
let common_params = FigureUpdateCommonParameters {
entity: None,

View File

@ -10,7 +10,7 @@ use crate::{
},
render::{
pipelines::{self, ColLights},
ColLightInfo, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model,
ColLightInfo, Consts, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model,
RenderError, Renderer, SpriteGlobalsBindGroup, SpriteInstance, SpriteVertex, SpriteVerts,
TerrainLocals, TerrainShadowDrawer, TerrainVertex, Texture, SPRITE_VERT_PAGE_SIZE,
},
@ -31,7 +31,7 @@ use common::{
volumes::vol_grid_2d::{VolGrid2d, VolGrid2dError},
};
use common_base::{prof_span, span};
use core::{f32, fmt::Debug, marker::PhantomData, time::Duration};
use core::{f32, fmt::Debug, marker::PhantomData, num::NonZeroU32, time::Duration};
use crossbeam_channel as channel;
use enum_iterator::IntoEnumIterator;
use guillotiere::AtlasAllocator;
@ -133,8 +133,8 @@ pub struct MeshWorkerResponseMesh {
opaque_model: Option<Model<TerrainVertex>>,
fluid_model: Option<Model<FluidVertex>>,
/// NOTE: These are memory mapped, and must be unmapped!
locals: pipelines::terrain::BoundLocals,
col_lights_info: ColLightInfo,
/* locals: pipelines::terrain::BoundLocals, */
col_lights_info: /*ColLightInfo*/(Option<Model<[u8; 4]>>, Vec2<u16>),
light_map: LightMapFn,
glow_map: LightMapFn,
}
@ -260,7 +260,8 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
create_opaque: impl for<'a> Fn(&'a Mesh<TerrainVertex>) -> Option<Model<TerrainVertex>>,
create_fluid: impl for<'a> Fn(&'a Mesh<FluidVertex>) -> Option<Model<FluidVertex>>,
create_instances: impl for<'a> Fn(&'a [SpriteInstance]) -> Instances<SpriteInstance>,
create_locals: impl Fn() -> pipelines::terrain::BoundLocals,
/* create_locals: impl Fn() -> pipelines::terrain::BoundLocals, */
create_texture: impl for<'a> Fn(/* wgpu::TextureDescriptor<'a>, wgpu::TextureViewDescriptor<'a>, wgpu::SamplerDescriptor<'a>*/&'a Mesh<[u8; 4]>) -> /*Texture + Send + Sync*/Option<Model<[u8; 4]>>,
) -> MeshWorkerResponse {
span!(_guard, "mesh_worker");
let (blocks_of_interest, sprite_kinds) = BlocksOfInterest::from_chunk(&chunk)/*default()*/;
@ -286,6 +287,9 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
&blocks_of_interest,
),
);
let mut tex_ = Mesh::new();
*tex_.vertices_mut_vec() = col_lights_info.0;
let tex = create_texture(&tex_);
mesh = Some(MeshWorkerResponseMesh {
// TODO: Take sprite bounds into account somehow?
z_bounds: (bounds.min.z, bounds.max.z),
@ -296,8 +300,8 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)),
opaque_model: create_opaque(&opaque_mesh),
fluid_model: create_fluid(&fluid_mesh),
locals: create_locals(),
col_lights_info,
/* locals: create_locals(), */
col_lights_info: (tex, col_lights_info.1),
light_map,
glow_map,
});
@ -454,6 +458,8 @@ pub struct Terrain<V: RectRasterableVol = TerrainChunk> {
/// for any particular chunk; look at the `texture` field in
/// `TerrainChunkData` for that.
col_lights: Arc<ColLights<pipelines::terrain::Locals>>,
/// Used to complete terrain texture updates.
pub(crate) command_buffers: Vec<wgpu::CommandBuffer>,
phantom: PhantomData<V>,
}
@ -595,7 +601,7 @@ impl SpriteRenderContext {
let sprite_col_lights = {
prof_span!("finalize");
greedy.finalize()
greedy.finalize(Vec2::broadcast(1))
};
SpriteWorkerResponse {
@ -664,7 +670,8 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
// TODO: Verify some good empirical constants.
small_size_threshold: 128,
large_size_threshold: 1024,
..guillotiere::AllocatorOptions::default()
// NOTE: Required by wgpu spec.
alignment: guillotiere::Size::new((wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / 4) as i32, 1),
});
// Number of background atlases to have prepared at a time. It is unlikely we would ever
@ -711,6 +718,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
&sprite_render_context.sprite_verts_buffer,
),
col_lights: Arc::new(col_lights),
command_buffers: vec![],
phantom: PhantomData,
}
}
@ -719,13 +727,15 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
/// read one when count is 0, and we can create extra atlases as count moves higher).
///
/// `old_texture` is an optional argument representing an old texture with the same size and
/// (ideally) format as the new \atlas.
/// (ideally) format as the new atlas. It also includes an encoder, since when we need a new
/// atlas texture after the initial one we are already in the process of encoding more
/// commands.
fn make_atlas(
slowjob: &SlowJobPool,
renderer: &mut Renderer,
new_atlas_tx: &mut channel::Sender<Texture>,
new_atlas_rx: &mut channel::Receiver<Texture>,
old_texture: Option<&Texture>,
old_texture: Option<(&Texture, &mut wgpu::CommandEncoder)>,
count: usize,
) -> Result<ColLights<pipelines::terrain::Locals>, channel::RecvError> {
span!(_guard, "make_atlas", "Terrain::make_atlas");
@ -733,49 +743,49 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
let atlas_size = guillotiere::Size::new(max_texture_size as i32, max_texture_size as i32);
(0..=count).for_each(|_| {
let new_atlas_tx = new_atlas_tx.clone();
let texture_fn = renderer.create_texture_raw(
wgpu::TextureDescriptor {
label: Some("Atlas texture"),
size: wgpu::Extent3d {
width: max_texture_size,
height: max_texture_size,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
// NOTE: COPY_SRC is used for the hack we use to work around zeroing, it
// shouldn't be needed otherwise.
usage: wgpu::TextureUsage::COPY_SRC | wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED,
},
wgpu::TextureViewDescriptor {
label: Some("Atlas texture view"),
format: Some(wgpu::TextureFormat::Rgba8Unorm),
dimension: Some(wgpu::TextureViewDimension::D2),
aspect: wgpu::TextureAspect::All,
base_mip_level: 0,
mip_level_count: None,
base_array_layer: 0,
array_layer_count: None,
},
wgpu::SamplerDescriptor {
label: Some("Atlas sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
},
);
let texture_fn = renderer.create_texture_raw();
slowjob.spawn(&"IMAGE_PROCESSING", move || {
// Construct the next atlas on a separate thread. If it doesn't get sent, it means
// the original channel was dropped, which implies the terrain scene data no longer
// exists, so we can just drop the result in that case.
let _ = new_atlas_tx.send(texture_fn());
});
// Construct the next atlas on a separate thread. If it doesn't get sent, it means
// the original channel was dropped, which implies the terrain scene data no longer
// exists, so we can just drop the result in that case.
let _ = new_atlas_tx.send(texture_fn(
wgpu::TextureDescriptor {
label: Some("Atlas texture"),
size: wgpu::Extent3d {
width: max_texture_size,
height: max_texture_size,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
// NOTE: COPY_SRC is used for the hack we use to work around zeroing, it
// shouldn't be needed otherwise.
usage: wgpu::TextureUsage::COPY_SRC | wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED,
},
wgpu::TextureViewDescriptor {
label: Some("Atlas texture view"),
format: Some(wgpu::TextureFormat::Rgba8Unorm),
dimension: Some(wgpu::TextureViewDimension::D2),
aspect: wgpu::TextureAspect::All,
base_mip_level: 0,
mip_level_count: None,
base_array_layer: 0,
array_layer_count: None,
},
wgpu::SamplerDescriptor {
label: Some("Atlas sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
},
));
});
});
// Receive the most recent available atlas. This call blocks only when there was no time
@ -787,9 +797,9 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
// neither exists, and uploading a zero texture can be slow. Fortunately, we almost always
// have an existing texture to use in this case, so we can replace the explicit clear with
// a copy from the previous atlas, skipping the CPU->GPU upload.
if let Some(old_texture) = old_texture {
if let Some((old_texture, encoder)) = old_texture {
// TODO: Delay submission, don't just submit immediately out of convenience!
renderer.replace_texture(&texture, old_texture);
renderer.replace_texture(encoder, &texture, old_texture);
} else {
renderer.clear_texture(&texture);
}
@ -808,9 +818,11 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
self.z_index_down.remove(Vec3::from(zmax, pos.x, pos.y)); */
}
fn insert_chunk(chunks: &mut HashMap<Vec2<i32>, TerrainChunkData>, atlas: &mut AtlasAllocator, pos: Vec2<i32>, chunk: TerrainChunkData) {
fn insert_chunk(slowjob: &SlowJobPool, chunks: &mut HashMap<Vec2<i32>, TerrainChunkData>, atlas: &mut AtlasAllocator, pos: Vec2<i32>, chunk: TerrainChunkData) {
if let Some(old) = chunks.insert(pos, chunk) {
Self::remove_chunk_meta(atlas, pos, &old);
// Drop the chunk on another thread.
slowjob.spawn(&"TERRAIN_DROP", move || { drop(old); });
}
/* let (zmin, zmax) = chunk.z_bounds;
self.z_index_up.insert(Vec3::from(zmin, pos.x, pos.y));
@ -1154,16 +1166,32 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
span!(guard, "Queue meshing from todo list");
let mesh_focus_pos = focus_pos.map(|e| e.trunc()).xy().as_::<i64>();
let mut min_active_dist = i64::MAX;
let mut todo = self
.mesh_todo
.values_mut()
.filter(|todo| todo.status != ChunkWorkerStatus::Active)
.map(|todo| {
(
(todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>())
.distance_squared(mesh_focus_pos),
todo
)
})
.filter(|(dist, todo)| {
if todo.status == ChunkWorkerStatus::Active {
min_active_dist = min_active_dist.min(*dist);
false
} else {
true
}
})
// TODO: BinaryHeap
.collect::<Vec<_>>();
todo.sort_unstable_by_key(|todo| {
todo.sort_unstable_by_key(|(dist, todo)| {
(
(todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>())
.distance_squared(mesh_focus_pos),
// Sort from back to front for stale or to-be-stale objects, since they get pushed
// in reverse order.
if *dist < min_active_dist || todo.status == ChunkWorkerStatus::Stale { -*dist } else { *dist },
todo.started_tick.load(Ordering::Relaxed),
)
});
@ -1173,7 +1201,12 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
/* .filter(|todo| todo.status != ChunkWorkerStatus::Active) */
/* .min_by_key(|todo| ((todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>()).distance_squared(mesh_focus_pos), todo.started_tick)) */
// Find a reference to the actual `TerrainChunk` we're meshing
./*and_then*/filter_map(|todo| {
./*and_then*/filter_map(|(dist, mut todo)| {
if dist < min_active_dist {
// Heuristic: if this chunk is lower than *any* currently active chunk, insert
// it at the front.
todo.status = ChunkWorkerStatus::Stale;
}
let pos = todo.pos;
Some((todo, terrain
.get_key_arc(pos)
@ -1249,11 +1282,12 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
let sprite_data = Arc::clone(&self.sprite_data);
let sprite_config = Arc::clone(&self.sprite_config);
let cnt = Arc::clone(&self.mesh_todos_active);
let create_opaque = renderer.create_model_lazy();
let create_fluid = renderer.create_model_lazy();
let create_opaque = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX);
let create_fluid = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX);
let create_instances = renderer.create_instances_lazy();
let create_locals = renderer.create_terrain_bound_locals();
cnt.fetch_add(1, Ordering::Relaxed);
/* let create_locals = renderer.create_terrain_bound_locals(); */
let create_texture = renderer./*create_texture_raw*/create_model_lazy(wgpu::BufferUsage::COPY_SRC);
/* cnt.fetch_add(1, Ordering::Relaxed); */
let job = move || {
// Since this loads when the task actually *runs*, rather than when it's
// queued, it provides us with a good opportunity to check whether the chunk
@ -1277,14 +1311,16 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
create_opaque,
create_fluid,
create_instances,
create_locals,
/* create_locals, */
create_texture,
));
cnt.fetch_add(1, Ordering::Relaxed);
}
cnt.fetch_sub(1, Ordering::Relaxed);
/* cnt.fetch_sub(1, Ordering::Relaxed); */
};
if todo.status == ChunkWorkerStatus::Stale {
// The chunk was updated unexpectedly, so insert at the front, not the back, to see
// the update as soon as possible.
// The chunk was updated out of order, so insert at the front, not
// the back, to see the update as soon as possible.
slowjob.spawn_front(&"TERRAIN_MESHING", job);
} else {
slowjob.spawn(&"TERRAIN_MESHING", job);
@ -1303,10 +1339,23 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
scene_data.state.get_delta_time() * CHUNKS_PER_SECOND + self.mesh_recv_overflow;
self.mesh_recv_overflow = recv_count.fract();
let mesh_recv = &self.mesh_recv;
let max_recv_count = self.mesh_todos_active.load(Ordering::Relaxed).min(recv_count.floor() as u64);
let incoming_chunks =
std::iter::from_fn(|| mesh_recv.try_recv().ok())
.take(recv_count.floor() as usize);
for response in incoming_chunks {
.take(/* recv_count.floor() as usize */max_recv_count as usize);
self.mesh_todos_active.fetch_sub(max_recv_count, Ordering::Relaxed);
if max_recv_count > 0 {
// Construct a buffer for all the chunks we're going to process in this frame. There might
// be some unused slots, which is fine.
let locals = /*Arc::new(*/renderer.create_consts_mapped(max_recv_count as usize)/*)*/;
let mut locals_buffer = renderer.get_consts_mapped(&locals);
let mut encoder = renderer.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Update textures."),
});
let locals_buffer_ = bytemuck::cast_slice_mut(&mut *locals_buffer);
for (locals_offset, (response, locals_buffer)) in incoming_chunks.zip(locals_buffer_).enumerate() {
match self.mesh_todo.get(&response.pos) {
// It's the mesh we want, insert the newly finished model into the terrain model
// data structure (convert the mesh to a model first of course).
@ -1330,6 +1379,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
.unwrap_or(current_time as f32);
// TODO: Allocate new atlas on allocation failure.
let (tex, tex_size) = mesh.col_lights_info;
let tex = tex.expect("The mesh exists, so the texture should too.");
let atlas = &mut self.atlas;
let chunks = &mut self.chunks;
let col_lights = &mut self.col_lights;
@ -1346,7 +1396,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
renderer,
new_atlas_tx,
new_atlas_rx,
Some(&col_lights.texture),
Some((&col_lights.texture, &mut encoder)),
0
)
.expect("Failed to create atlas texture");
@ -1377,15 +1427,43 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
allocation.rectangle.min.x as u32,
allocation.rectangle.min.y as u32,
);
renderer.update_texture(
/* renderer.update_texture(
&col_lights.texture,
atlas_offs.into_array(),
tex_size.map(u32::from).into_array(),
&tex,
); */
// Copy image
let tex_size = allocation.rectangle.size().to_array();
let bytes_per_pixel = wgpu::TextureFormat::Rgba8Unorm.describe().block_size as u32;
encoder.copy_buffer_to_texture(
wgpu::ImageCopyBuffer {
buffer: tex.buf(),
layout: wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: NonZeroU32::new(tex_size[0] as u32 * bytes_per_pixel),
rows_per_image: NonZeroU32::new(tex_size[1] as u32),
},
},
wgpu::ImageCopyTexture {
texture: &col_lights.texture.tex,
mip_level: 0,
origin: wgpu::Origin3d {
x: atlas_offs.x,
y: atlas_offs.y,
z: 0,
},
},
wgpu::Extent3d {
width: tex_size[0] as u32,
height: tex_size[1] as u32,
depth_or_array_layers: 1,
},
);
// Update the memory mapped locals.
renderer.update_mapped(&mut mesh.locals, &[TerrainLocals::new(
*locals_buffer =
/* renderer.update_mapped(&mut mesh.locals, &[*/TerrainLocals::new(
Vec3::from(
response.pos.map2(VolGrid2d::<V>::chunk_size(), |e, sz| {
e as f32 * sz as f32
@ -1393,9 +1471,10 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
),
atlas_offs,
load_time,
)]);
)/*])*/;
Self::insert_chunk(&mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData {
/* let locals = Arc::clone(&locals); */
Self::insert_chunk(&slowjob, &mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData {
load_time,
opaque_model: mesh.opaque_model,
fluid_model: mesh.fluid_model,
@ -1404,7 +1483,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
light_map: mesh.light_map,
glow_map: mesh.glow_map,
sprite_instances,
locals: mesh.locals,
locals: /* mesh.locals */renderer.create_terrain_bound_locals(&locals, locals_offset),
visible: Visibility {
in_range: false,
in_frustum: false,
@ -1432,7 +1511,13 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
None => {},
}
}
drop(slowjob);
// Drop the memory mapping and unmap the locals.
drop(locals_buffer);
renderer.unmap_consts(&locals);
/* // TODO: Delay submission, don't just submit immediately out of convenience!
renderer.queue.submit(std::iter::once(encoder.finish())); */
self.command_buffers.push(encoder.finish());
}
drop(guard);
// Construct view frustum
@ -1588,18 +1673,25 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
// shadows at the same time.
let chunks = &self.chunks;
self.shadow_chunks
.retain(|(pos, chunk)| !chunks.contains_key(pos) && can_shadow_sun(*pos, chunk));
.drain_filter(|(pos, chunk)| chunks.contains_key(pos) || !can_shadow_sun(*pos, chunk))
.for_each(|(pos, chunk)| {
// Drop the chunk on another thread.
slowjob.spawn(&"TERRAIN_DROP", move || { drop(chunk); });
});
(visible_light_volume, visible_bounds)
} else {
// There's no daylight or no shadows, so there's no reason to keep any
// shadow chunks around.
self.shadow_chunks.clear();
let chunks = core::mem::replace(&mut self.shadow_chunks, Vec::new());
// Drop the chunks on another thread.
slowjob.spawn(&"TERRAIN_DROP", move || { drop(chunks); });
(Vec::new(), math::Aabr {
min: math::Vec2::zero(),
max: math::Vec2::zero(),
})
};
drop(slowjob);
drop(guard);
span!(guard, "Rain occlusion magic");
// Check if there is rain near the camera

View File

@ -1650,6 +1650,10 @@ impl PlayState for SessionState {
fn globals_bind_group(&self) -> &GlobalsBindGroup { self.scene.global_bind_group() }
fn pre_commands(&mut self) -> Vec<wgpu::CommandBuffer> {
self.scene.pre_commands()
}
/// Render the session to the screen.
///
/// This method should be called once per frame.