From 04bb1e32d9074a8656d0e4d3ea88a8c4717e6621 Mon Sep 17 00:00:00 2001 From: Joshua Yanovski Date: Tue, 2 Aug 2022 18:01:17 -0700 Subject: [PATCH] WIP: Glow lighting performance improvements. Messes up water somewhat and is incomplete for glow lighting, but should be a fair amount faster in general. Also reduces stuttering during chunk generation by moving atlas creation and "clearing" off the main thread (in fact, we don't even clear it except for the very first allocated terrain atlas). --- common/src/terrain/sprite.rs | 2 +- voxygen/src/lib.rs | 1 + voxygen/src/mesh/terrain.rs | 523 +++++++++++++++++++++++---------- voxygen/src/render/renderer.rs | 36 ++- voxygen/src/render/texture.rs | 32 ++ voxygen/src/scene/mod.rs | 2 +- voxygen/src/scene/terrain.rs | 179 ++++++++--- 7 files changed, 559 insertions(+), 216 deletions(-) diff --git a/common/src/terrain/sprite.rs b/common/src/terrain/sprite.rs index e88ed4b38c..54838b90f7 100644 --- a/common/src/terrain/sprite.rs +++ b/common/src/terrain/sprite.rs @@ -454,7 +454,7 @@ impl SpriteKind { } #[inline] - pub fn get_glow(&self) -> Option { + pub const fn get_glow(&self) -> Option { match self { SpriteKind::StreetLamp | SpriteKind::StreetLampTall => Some(24), SpriteKind::Ember => Some(20), diff --git a/voxygen/src/lib.rs b/voxygen/src/lib.rs index a9d1d0d0cb..7df571364f 100644 --- a/voxygen/src/lib.rs +++ b/voxygen/src/lib.rs @@ -10,6 +10,7 @@ once_cell, stmt_expr_attributes, trait_alias, + type_alias_impl_trait, option_get_or_insert_default, map_try_insert, slice_as_chunks, diff --git a/voxygen/src/mesh/terrain.rs b/voxygen/src/mesh/terrain.rs index 949b6bb7ad..f1ad3e09ec 100644 --- a/voxygen/src/mesh/terrain.rs +++ b/voxygen/src/mesh/terrain.rs @@ -33,39 +33,83 @@ pub const SUNLIGHT: u8 = 24; pub const SUNLIGHT_INV: f32 = 1.0 / SUNLIGHT as f32; pub const MAX_LIGHT_DIST: i32 = SUNLIGHT as i32; -fn calc_light + ReadVol + Debug>( +/// Working around lack of existential types. +/// +/// See [https://github.com/rust-lang/rust/issues/42940] +type CalcLightFn = impl Fn(Vec3) -> f32 + 'static + Send + Sync; + +#[inline(always)] +fn flat_get<'a>(flat: &'a Vec, w: i32, h: i32, d: i32) -> impl Fn(Vec3) -> Block + 'a { + let hd = h * d; + let flat = &flat[0..(w * hd) as usize]; + #[inline(always)] move |Vec3 { x, y, z }| { + // z can range from -1..range.size().d + 1 + let z = z + 1; + flat[(x * hd + y * d + z) as usize] + /* match flat.get((x * hd + y * d + z) as usize).copied() { + Some(b) => b, + None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h), + } */ + } +} + +fn calc_light<'a, + V: RectRasterableVol + ReadVol + Debug + 'static, + I: Iterator, u8)>, + /* F: /*for<'x> */for<'a> fn(&'a Vec) -> G, */ + /* G: /*[&'x &'a (); 0], */Fn(Vec3) -> Block, */ +>( is_sunlight: bool, // When above bounds default_light: u8, bounds: Aabb, - vol: &VolGrid2d, - lit_blocks: impl Iterator, u8)>, -) -> impl Fn(Vec3) -> f32 + 'static + Send + Sync { - /* span!(_guard, "calc_light"); + range: Aabb, + vol: &'a VolGrid2d, + lit_blocks: I, + flat: &'a Vec, + (w, h, d): (i32, i32, i32) +) -> CalcLightFn { + span!(_guard, "calc_light"); const UNKNOWN: u8 = 255; const OPAQUE: u8 = 254; let outer = Aabb { - min: bounds.min - Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1), - max: bounds.max + Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1), + min: bounds.min/* - Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1) */ - Vec3::new(0, 0, 1), + max: bounds.max/* + Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1) */ + Vec3::new(0, 0, 1), }; - let mut vol_cached = vol.cached(); + let range_delta = outer.min - range.min; - let mut light_map = vec![UNKNOWN; outer.size().product() as usize]; + /* let mut vol_cached = vol.cached(); */ + + let mut light_map_ = vec![UNKNOWN; outer.size().product() as usize]; + let (w_, h_, d_) = outer.clone().size().into_tuple(); + let wh_ = w_ * h_; + let light_map = &mut light_map_[0..(wh_ * d_) as usize]; let lm_idx = { - let (w, h, _) = outer.clone().size().into_tuple(); - move |x, y, z| (w * h * z + h * x + y) as usize + #[inline(always)] move |x, y, z| { + (wh_ * z + h_ * x + y) as usize + } }; - // Light propagation queue + /* // Light propagation queue let mut prop_que = lit_blocks .map(|(pos, light)| { let rpos = pos - outer.min; light_map[lm_idx(rpos.x, rpos.y, rpos.z)] = light.min(SUNLIGHT); // Brightest light (rpos.x as u8, rpos.y as u8, rpos.z as u16) }) - .collect::>(); - // Start sun rays + .collect::>(); */ + let mut prop_que = vec![Vec::new(); usize::from(SUNLIGHT) + 1]; + let mut prop_queue = &mut prop_que[..usize::from(SUNLIGHT) + 1]; + lit_blocks.for_each(|(pos, light)| { + /* println!("Lighting {:?}: {:?}", pos, light); */ + let rpos = pos - outer.min; + let glow = light.min(SUNLIGHT); + light_map[lm_idx(rpos.x, rpos.y, rpos.z)] = glow; // Brightest light + prop_que[usize::from(glow)].push((rpos.x as u8, rpos.y as u8, rpos.z as u16)); + }); + + /* // Start sun rays if is_sunlight { for x in 0..outer.size().w { for y in 0..outer.size().h { @@ -89,110 +133,150 @@ fn calc_light + ReadVol + Debug>( } } } - } + } */ // Determines light propagation - let propagate = |src: u8, + let flat_get = flat_get(flat, w, h, d); + let propagate = #[inline(always)] |src: u8, dest: &mut u8, pos: Vec3, - prop_que: &mut VecDeque<_>, - vol: &mut CachedVolGrid2d| { - if *dest != OPAQUE { - if *dest == UNKNOWN { - if vol + prop_que: &mut /*VecDeque*/Vec<_>, + /* vol: &mut CachedVolGrid2d */| { + let dst = *dest; + /* if dst != OPAQUE */{ + if dst < src || dst == UNKNOWN /* { + if */&& /* vol .get(outer.min + pos) .ok() - .map_or(false, |b| b.is_fluid()) + .map_or(false, |b| b.is_fluid()) */ + flat_get(/*[], */pos + range_delta).is_fluid() { - *dest = src.saturating_sub(1); + /* *dest = src.saturating_sub(1); */ + *dest = src; // Can't propagate further - if *dest > 1 { - prop_que.push_back((pos.x as u8, pos.y as u8, pos.z as u16)); + if /* *dest */src > 1 { + prop_que./*push_back*/push((pos.x as u8, pos.y as u8, pos.z as u16)); } - } else { + }/* else { *dest = OPAQUE; - } - } else if *dest < src.saturating_sub(1) { - *dest = src - 1; + } */ + /*} else if *dest < src/* .saturating_sub(1) */ { + *dest = src/* - 1 */; // Can't propagate further - if *dest > 1 { - prop_que.push_back((pos.x as u8, pos.y as u8, pos.z as u16)); + if /* *dest */src > 1 { + prop_que./*push_back*/push((pos.x as u8, pos.y as u8, pos.z as u16)); } - } + } */ } }; // Propagate light - while let Some(pos) = prop_que.pop_front() { - let pos = Vec3::new(pos.0 as i32, pos.1 as i32, pos.2 as i32); - let light = light_map[lm_idx(pos.x, pos.y, pos.z)]; + // + // NOTE: We start at 2 because starting at 1 would propagate light of brightness 0 to + // neighbors. + (2..usize::from(SUNLIGHT) + 1).rev().for_each(|light| { + let (front, back) = prop_que.split_at_mut(light); + let prop_que = front.last_mut().expect("Split at least at index 2, so front must have at least 1 element."); + let front = back.first_mut().expect("Split at most at SUNLIGHT, and array has length SUNLIGHT+1, so back must have at least 1 element."); + let light = light as u8; + // NOTE: Always in bounds and ≥ 1, since light ≥ 2. + let new_light = light - 1; + /* println!("Light: {:?}", light); */ + /* while let Some(pos) = prop_que.pop_front() */front.iter().for_each(|pos| { + let pos = Vec3::new(pos.0 as i32, pos.1 as i32, pos.2 as i32); + let light_ = light_map[lm_idx(pos.x, pos.y, pos.z)]; + if light != light_ { + // This block got modified before it could emit anything. + return; + } + /* println!("Pos: {:?}", pos); */ - // Up - // Bounds checking - if pos.z + 1 < outer.size().d { - propagate( - light, - light_map.get_mut(lm_idx(pos.x, pos.y, pos.z + 1)).unwrap(), - Vec3::new(pos.x, pos.y, pos.z + 1), - &mut prop_que, - &mut vol_cached, - ) - } - // Down - if pos.z > 0 { - propagate( - light, - light_map.get_mut(lm_idx(pos.x, pos.y, pos.z - 1)).unwrap(), - Vec3::new(pos.x, pos.y, pos.z - 1), - &mut prop_que, - &mut vol_cached, - ) - } - // The XY directions - if pos.y + 1 < outer.size().h { - propagate( - light, - light_map.get_mut(lm_idx(pos.x, pos.y + 1, pos.z)).unwrap(), - Vec3::new(pos.x, pos.y + 1, pos.z), - &mut prop_que, - &mut vol_cached, - ) - } - if pos.y > 0 { - propagate( - light, - light_map.get_mut(lm_idx(pos.x, pos.y - 1, pos.z)).unwrap(), - Vec3::new(pos.x, pos.y - 1, pos.z), - &mut prop_que, - &mut vol_cached, - ) - } - if pos.x + 1 < outer.size().w { - propagate( - light, - light_map.get_mut(lm_idx(pos.x + 1, pos.y, pos.z)).unwrap(), - Vec3::new(pos.x + 1, pos.y, pos.z), - &mut prop_que, - &mut vol_cached, - ) - } - if pos.x > 0 { - propagate( - light, - light_map.get_mut(lm_idx(pos.x - 1, pos.y, pos.z)).unwrap(), - Vec3::new(pos.x - 1, pos.y, pos.z), - &mut prop_que, - &mut vol_cached, - ) - } - } + // Up + // Bounds checking + // NOTE: Array accesses are all safe even if they are technically out of bounds, + // because we have margin on all sides and the light sources only come from within the + // proper confines of the volume. This allows us to fetch them before the if + // statements. + { + /* let light_map = light_map.as_mut_ptr(); + let z_up = &mut *light_map.offset(lm_idx(pos.x, pos.y, pos.z + 1) as isize); + let z_down = &mut *light_map.offset(lm_idx(pos.x, pos.y, pos.z - 1) as isize); + let y_up = &mut *light_map.offset(lm_idx(pos.x, pos.y + 1, pos.z) as isize); + let y_down = &mut *light_map.offset(lm_idx(pos.x, pos.y - 1, pos.z) as isize); + let x_up = &mut *light_map.offset(lm_idx(pos.x + 1, pos.y, pos.z) as isize); + let x_down = &mut *light_map.offset(lm_idx(pos.x - 1, pos.y, pos.z) as isize); */ + if pos.z + 1 < outer.size().d { + let z_up = &mut light_map[lm_idx(pos.x, pos.y, pos.z + 1)]; + propagate( + new_light, + z_up, + Vec3::new(pos.x, pos.y, pos.z + 1), + /*&mut */prop_que, + /* &mut vol_cached, */ + ) + } + // Down + if pos.z > 0 { + let z_down = &mut light_map[lm_idx(pos.x, pos.y, pos.z - 1)]; + propagate( + new_light, + z_down, + Vec3::new(pos.x, pos.y, pos.z - 1), + /*&mut */prop_que, + /* &mut vol_cached, */ + ) + } + // The XY directions + if pos.y + 1 < outer.size().h { + let y_up = &mut light_map[lm_idx(pos.x, pos.y + 1, pos.z)]; + propagate( + new_light, + y_up, + Vec3::new(pos.x, pos.y + 1, pos.z), + /*&mut */prop_que, + /* &mut vol_cached, */ + ) + } + if pos.y > 0 { + let y_down = &mut light_map[lm_idx(pos.x, pos.y - 1, pos.z)]; + propagate( + new_light, + y_down, + Vec3::new(pos.x, pos.y - 1, pos.z), + /*&mut */prop_que, + /* &mut vol_cached, */ + ) + } + if pos.x + 1 < outer.size().w { + let x_up = &mut light_map[lm_idx(pos.x + 1, pos.y, pos.z)]; + propagate( + new_light, + x_up, + Vec3::new(pos.x + 1, pos.y, pos.z), + /*&mut */prop_que, + /* &mut vol_cached, */ + ) + } + if pos.x > 0 { + let x_down = &mut light_map[lm_idx(pos.x - 1, pos.y, pos.z)]; + propagate( + new_light, + x_down, + Vec3::new(pos.x - 1, pos.y, pos.z), + /*&mut */prop_que, + /* &mut vol_cached, */ + ) + } + } + }); + }); let min_bounds = Aabb { - min: bounds.min - 1, - max: bounds.max + 1, + min: bounds.min - Vec3::unit_z(), + max: bounds.max + Vec3::unit_z(), }; - // Minimise light map to reduce duplication. We can now discard light info + /* // Minimise light map to reduce duplication. We can now discard light info // for blocks outside of the chunk borders. let mut light_map2 = vec![UNKNOWN; min_bounds.size().product() as usize]; let lm_idx2 = { @@ -208,13 +292,14 @@ fn calc_light + ReadVol + Debug>( } } - drop(light_map); */ + drop(light_map_); */ + let light_map2 = light_map_; - move |wpos| { - if is_sunlight { 1.0 } else { 0.0 } - /* let pos = wpos - min_bounds.min; + #[inline(always)] move |wpos| { + if is_sunlight { return 1.0 }/* else { 0.0 } */ + let pos = wpos - min_bounds.min; let l = light_map2 - .get(lm_idx2(pos.x, pos.y, pos.z)) + .get(/*lm_idx2*/lm_idx(pos.x, pos.y, pos.z)) .copied() .unwrap_or(default_light); @@ -222,7 +307,7 @@ fn calc_light + ReadVol + Debug>( l as f32 * SUNLIGHT_INV } else { 0.0 - } */ + } } } @@ -230,7 +315,7 @@ fn calc_light + ReadVol + Debug>( #[inline(always)] pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + 'static>( vol: &'a VolGrid2d, - (range, max_texture_size, _boi): (Aabb, Vec2, &'a BlocksOfInterest), + (range, max_texture_size, boi): (Aabb, Vec2, &'a BlocksOfInterest), ) -> MeshGen< TerrainVertex, FluidVertex, @@ -248,51 +333,94 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' "<&VolGrid2d as Meshable<_, _>>::generate_mesh" ); - // Find blocks that should glow - // TODO: Search neighbouring chunks too! - // let glow_blocks = boi.lights - // .iter() - // .map(|(pos, glow)| (*pos + range.min.xy(), *glow)); - /* DefaultVolIterator::new(vol, range.min - MAX_LIGHT_DIST, range.max + MAX_LIGHT_DIST) - .filter_map(|(pos, block)| block.get_glow().map(|glow| (pos, glow))); */ - - let mut glow_blocks = Vec::new(); - - /* // TODO: This expensive, use BlocksOfInterest instead - let mut volume = vol.cached(); - for x in -MAX_LIGHT_DIST..range.size().w + MAX_LIGHT_DIST { - for y in -MAX_LIGHT_DIST..range.size().h + MAX_LIGHT_DIST { - for z in -1..range.size().d + 1 { - let wpos = range.min + Vec3::new(x, y, z); - volume - .get(wpos) - .ok() - .and_then(|b| b.get_glow()) - .map(|glow| glow_blocks.push((wpos, glow))); - } - } - } */ - - // Calculate chunk lighting (sunlight defaults to 1.0, glow to 0.0) - let light = calc_light(true, SUNLIGHT, range, vol, core::iter::empty()); - let glow = calc_light(false, 0, range, vol, glow_blocks.into_iter()); - let mut opaque_limits = None::; let mut fluid_limits = None::; let mut air_limits = None::; let mut flat; - let flat_get = { + let (w, h, d) = range.size().into_tuple(); + // z can range from -1..range.size().d + 1 + let d = d + 2; + { span!(_guard, "copy to flat array"); - let (w, h, d) = range.size().into_tuple(); - // z can range from -1..range.size().d + 1 - let d = d + 2; + let hd = h * d; /*let flat = */{ - let mut volume = vol.cached(); + /* let mut volume = vol.cached(); */ const AIR: Block = Block::air(common::terrain::sprite::SpriteKind::Empty); // TODO: Once we can manage it sensibly, consider using something like // Option instead of just assuming air. - /*let mut */flat = vec![AIR; (w * h * d) as usize]; - let mut i = 0; + /*let mut */flat = vec![AIR; (w * /*h * d*/hd) as usize] + /* Vec::with_capacity((w * /*h * d*/hd) as usize) */ + ; + let flat = &mut flat/*.spare_capacity_mut()*/[0..(w * hd) as usize]; + /* /*volume*/vol.iter().for_each(|(chunk_key, chunk)| { + let corner = chunk.key_pos(chunk_key); + }); */ + let flat_range = Aabb { + min: range.min - Vec3::new(0, 0, 1), + max: range.max - Vec3::new(1, 1, 0), + }; + let min_chunk_key = vol.pos_key(flat_range.min); + let max_chunk_key = vol.pos_key(flat_range.max); + (min_chunk_key.x..max_chunk_key.x + 1).for_each(|key_x| { + (min_chunk_key.y..max_chunk_key.y + 1).for_each(|key_y| { + let key = Vec2::new(key_x, key_y); + let chonk = vol.get_key(key).expect("All keys in range must have chonks."); + /* vol.iter().for_each(|(key, chonk)| { */ + let chonk = &*chonk; + let pos = vol.key_pos(key); + // Calculate intersection of Aabb and this chunk + // TODO: should we do this more implicitly as part of the loop + // TODO: this probably has to be computed in the chunk.for_each_in() as well + // maybe remove here? + let intersection_ = flat_range.intersection(Aabb { + min: pos.with_z(i32::MIN), + // -1 here since the Aabb is inclusive and chunk_offs below will wrap it if + // it's outside the range of the chunk + max: (pos + VolGrid2d::::chunk_size().map(|e| e as i32) - 1).with_z(i32::MAX), + }); + + // Map intersection into chunk coordinates + let x_diff = intersection_.min.x - flat_range.min.x; + let y_diff = intersection_.min.y - flat_range.min.y; + let z_diff = -range.min.z; + let y_rem = flat_range.max.y - intersection_.max.y; + let x_off = ((y_diff + y_rem) * d) as usize; + + let intersection = Aabb { + min: VolGrid2d::::chunk_offs(intersection_.min) + Vec3::new(0, 0, z_diff), + max: VolGrid2d::::chunk_offs(intersection_.max) + Vec3::new(1, 1, z_diff + 1), + }; + + /* [[0 ..1]; [0 ..1]; [0..d]] + [[0 ..1]; [1 ..h-1]; [0..d]] + [[0 ..1]; [h-1..h]; [0..d]] + // How to handle middle? + // Answer: + [[w-1..w]; [0 ..1]; [0..d]] + [[w-1..w]; [1 ..h-1]; [0..d]] + [[w-1..w]; [h-1..h]; [0..d]] + + [1,1; d] */ + + let mut i = (x_diff * hd + y_diff * d) as usize; + let hd_ = (intersection.size().h * d) as usize; + // dbg!(pos, intersection_, intersection, range, flat_range, x_diff, y_diff, z_diff, y_rem, x_off, i); + (intersection.min.x..intersection.max.x).for_each(|x| { + let flat = &mut flat[i..i + /*intersection.size().y * intersection.size().z*/hd_]; + flat.chunks_exact_mut(d as usize).enumerate().for_each(|(y, flat)| { + let y = y as i32 + intersection.min.y; + /* (intersection.min.y..intersection.max.y).for_each(|y| { */ + /* let mut i = (x * hd + y * d) as usize; */ + /* chonk.for_each_in(intersection, |pos_offset, block| { + pos_offset.z += z_diff; + }); */ + flat.into_iter().enumerate().for_each(|(z, flat)| { + let z = z as i32 + intersection.min.z; + /* (intersection.min.z..intersection.max.z).for_each(|z| { */ + /* let mut i = ((x_diff + (x - intersection.min.x)) * hd + (y_diff + (y - intersection.min.y)) * d + (z - intersection.min.z)) as usize; */ + /* vol.iter(flat_range, |wpos, block| { + let z = wpos.z - range.min.z; */ + /* let mut i = 0; for x in 0..range.size().w { for y in 0..range.size().h { for z in -1..range.size().d + 1 { @@ -302,7 +430,10 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' .map(|b| *b) // TODO: Replace with None or some other more reasonable value, // since it's not clear this will work properly with liquid. - .unwrap_or(AIR); + .unwrap_or(AIR); */ + /* if let Ok(&block) = chonk.get(Vec3::new(x, y, z - z_diff)) */ + let block = chonk.get(Vec3::new(x, y, z - z_diff)).copied().unwrap_or(AIR); + { if block.is_opaque() { opaque_limits = opaque_limits .map(|l| l.including(z)) @@ -317,30 +448,47 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' .map(|l| l.including(z)) .or_else(|| Some(Limits::from_value(z))); }; - flat[i] = block; - i += 1; + /*flat[i] = block*//*unsafe { flat.get_unchecked_mut(i) }*//*flat[i].write(block);*/ + /* flat.write(block); */ + *flat = block; } + /* i += 1; */ + /* } } - } - /* flat */ - }; - - let hd = h * d; - let flat = &flat[0..(w * hd) as usize]; - #[inline(always)] move |Vec3 { x, y, z }| { - // z can range from -1..range.size().d + 1 - let z = z + 1; - flat[(x * hd + y * d + z) as usize] - /* match flat.get((x * hd + y * d + z) as usize).copied() { - Some(b) => b, - None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h), } */ + /* flat */ + /* }); */ + /* }); */ + }); + // i += d; + /* }); */ + }); + // i += x_off; + i += hd as usize; + }); + /* }); */ + }); + }); } - }; + /* unsafe { flat.set_len((w * hd) as usize); } */ + } // Constrain iterated area + let mut opaque_mesh = Mesh::new(); + let mut fluid_mesh = Mesh::new(); let (z_start, z_end) = match (air_limits, fluid_limits, opaque_limits) { - (Some(air), Some(fluid), Some(opaque)) => air.three_way_intersection(fluid, opaque), + (Some(air), Some(fluid), Some(opaque)) => { + let air_fluid = air.intersection(fluid); + if let Some(intersection) = air_fluid.filter(|limits| limits.min + 1 == limits.max) { + // If there is a planar air-fluid boundary, just draw it directly and avoid + // redundantly meshing the whole fluid volume, then interect the ground-fluid + // and ground-air meshes to make sure we don't miss anything. + either_with(air.intersection(opaque), fluid.intersection(opaque), Limits::union) + } else { + // Otherwise, do a normal three-way intersection. + air.three_way_intersection(fluid, opaque) + } + }, (Some(air), Some(fluid), None) => air.intersection(fluid), (Some(air), None, Some(opaque)) => air.intersection(opaque), (None, Some(fluid), Some(opaque)) => fluid.intersection(opaque), @@ -358,6 +506,62 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' (start, end) }); + // Calculate chunk lighting (sunlight defaults to 1.0, glow to 0.0) + let mut glow_range = range; + if let Some(opaque) = opaque_limits { + glow_range.min.z = z_start.max(opaque.min) + range.min.z; + glow_range.max.z = (z_end.min(opaque.max) + range.min.z).max(glow_range.min.z); + } + // Find blocks that should glow + // TODO: Search neighbouring chunks too! + let mut glow_block_min = glow_range.max.z; + let mut glow_block_max = glow_range.min.z; + let mut glow_blocks = boi.lights + .iter() + .map(|(pos, glow)| { + let pos_z = pos.z.clamped(glow_range.min.z, glow_range.max.z); + let glow = (i32::from(*glow) - (pos_z - pos.z).abs()).max(0); + glow_block_min = glow_block_min.min(pos_z - glow); + glow_block_max = glow_block_max.max(pos_z + glow); + ((*pos + glow_range.min.xy()).with_z(pos_z), glow as u8) + }) + // FIXME: Why is Rust forcing me to collect to Vec here? + .collect::>(); + glow_range.min.z = glow_block_min.clamped(glow_range.min.z, glow_range.max.z); + glow_range.max.z = glow_block_max.clamped(glow_range.min.z, glow_range.max.z); + /* if glow_range.min.z != glow_range.max.z { + println!("{:?}", glow_range); + } */ + + let mut light_range = glow_range; + light_range.min.z = light_range.max.z; + + /* // Sort glowing blocks in decreasing order by glow strength. This makes it somewhat less + // likely that a smaller glow will have to be drawn. + glow_blocks.sort_unstable_by_key(|(_, glow)| core::cmp::Reverse(*glow)); */ + + /* DefaultVolIterator::new(vol, range.min - MAX_LIGHT_DIST, range.max + MAX_LIGHT_DIST) + .filter_map(|(pos, block)| block.get_glow().map(|glow| (pos, glow))); */ + + /* let mut glow_blocks = Vec::new(); */ + + /* // TODO: This expensive, use BlocksOfInterest instead + let mut volume = vol.cached(); + for x in -MAX_LIGHT_DIST..range.size().w + MAX_LIGHT_DIST { + for y in -MAX_LIGHT_DIST..range.size().h + MAX_LIGHT_DIST { + for z in -1..range.size().d + 1 { + let wpos = range.min + Vec3::new(x, y, z); + volume + .get(wpos) + .ok() + .and_then(|b| b.get_glow()) + .map(|glow| glow_blocks.push((wpos, glow))); + } + } + } */ + let light = calc_light(true, SUNLIGHT, light_range, range, vol, core::iter::empty(), &flat, (w, h, d)); + let glow = calc_light(false, 0, glow_range, range, vol, glow_blocks.into_iter(), &flat, (w, h, d)); + let max_size = max_texture_size; assert!(z_end >= z_start); let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1); @@ -375,6 +579,7 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' let greedy_size_cross = Vec3::new(greedy_size.x - 1, greedy_size.y - 1, greedy_size.z); let draw_delta = Vec3::new(1, 1, z_start); + let flat_get = flat_get(&flat, w, h, d); let get_light = #[inline(always)] |_: &mut (), pos: Vec3| { if flat_get(pos).is_opaque() { 0.0 @@ -400,8 +605,6 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' let mut greedy = GreedyMesh::::new(max_size, greedy::general_config()); - let mut opaque_mesh = Mesh::new(); - let mut fluid_mesh = Mesh::new(); greedy.push(GreedyConfig { data: (), draw_delta, diff --git a/voxygen/src/render/renderer.rs b/voxygen/src/render/renderer.rs index 804b8aacb9..047f1199b6 100644 --- a/voxygen/src/render/renderer.rs +++ b/voxygen/src/render/renderer.rs @@ -1378,15 +1378,20 @@ impl Renderer { } /// Create a new raw texture. - pub fn create_texture_raw( + /// + /// NOTE: This is done lazily--the returned function must be invoked to actually create the + /// texture. This allows creating the texture on another thread. + pub fn create_texture_raw<'a>( &mut self, - texture_info: &wgpu::TextureDescriptor, - view_info: &wgpu::TextureViewDescriptor, - sampler_info: &wgpu::SamplerDescriptor, - ) -> Texture { - let texture = Texture::new_raw(&self.device, texture_info, view_info, sampler_info); - texture.clear(&self.queue); // Needs to be fully initialized for partial writes to work on Dx12 AMD - texture + texture_info: wgpu::TextureDescriptor<'a>, + view_info: wgpu::TextureViewDescriptor<'a>, + sampler_info: wgpu::SamplerDescriptor<'a>, + ) -> impl FnOnce() -> Texture + Send + Sync + 'a { + let device = Arc::clone(&self.device); + move || { + let texture = Texture::new_raw(&device, &texture_info, &view_info, &sampler_info); + texture + } } /// Create a new texture from the provided image. @@ -1429,6 +1434,21 @@ impl Renderer { texture.update(&self.queue, offset, size, bytemuck::cast_slice(data)) } + /// Clears the texture data to 0. + pub fn clear_texture( + &mut self, + texture: &Texture, /* */ + ) { + texture.clear(&self.queue) + } + + /// Replaces the destination texture with the contents of the source texture. + /// + /// The source size should at least fit within the destination texture's size. + pub fn replace_texture(&mut self, dest: &Texture, source: &Texture) { + dest.replace(&self.device, &self.queue, source); + } + /// Queue to obtain a screenshot on the next frame render pub fn create_screenshot( &mut self, diff --git a/voxygen/src/render/texture.rs b/voxygen/src/render/texture.rs index 39db02e2ba..c69eafbe87 100644 --- a/voxygen/src/render/texture.rs +++ b/voxygen/src/render/texture.rs @@ -190,6 +190,38 @@ impl Texture { self.update(queue, [0, 0], [size.width, size.height], &zeros); } + /// Replaces this texture with the contents of another texture. + /// + /// The source size should at least fit within this texture's size. + pub fn replace(&self, device: &wgpu::Device, queue: &wgpu::Queue, texture: &Self) { + let mut encoder = device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("Replace the texture buffer"), + }); + + // Copy image + encoder.copy_texture_to_texture( + wgpu::ImageCopyTexture { + texture: &texture.tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + }, + wgpu::ImageCopyTexture { + texture: &self.tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + }, + wgpu::Extent3d { + width: texture.size.width, + height: texture.size.height, + depth_or_array_layers: 1, + }, + ); + + // TODO: Delay submission, don't just submit immediately out of convenience! + queue.submit(std::iter::once(encoder.finish())); + } + /// Update a texture with the given data (used for updating the glyph cache /// texture). pub fn update(&self, queue: &wgpu::Queue, offset: [u32; 2], size: [u32; 2], data: &[u8]) { diff --git a/voxygen/src/scene/mod.rs b/voxygen/src/scene/mod.rs index a194856953..c4b6e4f556 100644 --- a/voxygen/src/scene/mod.rs +++ b/voxygen/src/scene/mod.rs @@ -295,7 +295,7 @@ impl Scene { let globals_bind_group = renderer.bind_globals(&data, lod.get_data()); - let terrain = Terrain::new(renderer, &data, lod.get_data(), sprite_render_context); + let terrain = Terrain::new(client, renderer, &data, lod.get_data(), sprite_render_context); Self { data, diff --git a/voxygen/src/scene/terrain.rs b/voxygen/src/scene/terrain.rs index dd7e370221..5e7815c844 100644 --- a/voxygen/src/scene/terrain.rs +++ b/voxygen/src/scene/terrain.rs @@ -12,7 +12,7 @@ use crate::{ pipelines::{self, ColLights}, ColLightInfo, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model, RenderError, Renderer, SpriteGlobalsBindGroup, SpriteInstance, SpriteVertex, SpriteVerts, - TerrainLocals, TerrainShadowDrawer, TerrainVertex, SPRITE_VERT_PAGE_SIZE, + TerrainLocals, TerrainShadowDrawer, TerrainVertex, Texture, SPRITE_VERT_PAGE_SIZE, }, }; @@ -20,6 +20,7 @@ use super::{ camera::{self, Camera}, math, SceneData, RAIN_THRESHOLD, }; +use client::Client; use common::{ assets::{self, AssetExt, DotVoxAsset}, figure::Segment, @@ -97,6 +98,7 @@ pub struct TerrainChunkData { can_shadow_point: bool, can_shadow_sun: bool, z_bounds: (f32, f32), + shadow_z_bounds: (f32, f32), frustum_last_plane_index: u8, } @@ -112,6 +114,7 @@ struct ChunkMeshState { /// Just the mesh part of a mesh worker response. pub struct MeshWorkerResponseMesh { z_bounds: (f32, f32), + shadow_z_bounds: (f32, f32), opaque_mesh: Mesh, fluid_mesh: Mesh, col_lights_info: ColLightInfo, @@ -263,6 +266,11 @@ fn mesh_worker + RectRasterableVol + ReadVol + Debug + ' mesh = Some(MeshWorkerResponseMesh { // TODO: Take sprite bounds into account somehow? z_bounds: (bounds.min.z, bounds.max.z), + // NOTE: It should be pretty much impossible to see *just* the edge of an object in + // another chunk without seeing any of the actual voxel associated with that object, so + // we can ignore such cases for the purposes of determining a shadow bounding box (but + // not visibility, unfortunately). + shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)), opaque_mesh, fluid_mesh, col_lights_info, @@ -406,6 +414,8 @@ pub struct Terrain { // workers. mesh_send_tmp: channel::Sender, mesh_recv: channel::Receiver, + new_atlas_tx: channel::Sender<(AtlasAllocator, Texture)>, + new_atlas_rx: channel::Receiver<(AtlasAllocator, Texture)>, mesh_todo: HashMap, ChunkMeshState>, mesh_todos_active: Arc, mesh_recv_overflow: f32, @@ -614,6 +624,7 @@ impl SpriteRenderContext { impl Terrain { pub fn new( + client: &Client, renderer: &mut Renderer, global_model: &GlobalModel, lod_data: &LodData, @@ -623,8 +634,29 @@ impl Terrain { // with worker threads that are meshing chunks. let (send, recv) = channel::unbounded(); + // Number of background atlases to have prepared at a time. It is unlikely we would ever + // want to change this value from 1, unless rendering gets super speedy or our atlas count + // starts exploding. This should never be set to 0 unless you are okay with blocking every + // single atlas (this technically improves memory utilization, but the tradeoff can result + // in really bad stuttering; if this becomes a desirable configuration, we can rework the + // way the rest of Terrain::maintain works so that it can deal with no atlas being + // available). + const EXTRA_ATLAS_COUNT: usize = 1; + + // Create a second mpsc pair for offloading atlas allocation to a second thread. This way, + // a second thread is usually ready to produce a new atlas the moment we ask for it, so we + // avoid waiting longer than necessary. The channel holds just BACKGROUND_ATLASE_COUNT + // atlases at a time because we always (except at the very beginning) start creating a new + // atlas at the same time that we receive a previous one, and at the beginning we create + // 1 + EXTRA_ATLAS_COUNT atlases (one of which we immediately consume), so we can + // never have more then one atlas in transit at a time. + let (mut new_atlas_tx, mut new_atlas_rx) = channel::bounded(EXTRA_ATLAS_COUNT); + + // We start by creating an extra atlas, ensuring that we are always building one more atlas + // than we currently need in a background job. let (atlas, col_lights) = - Self::make_atlas(renderer).expect("Failed to create atlas texture"); + Self::make_atlas(client, renderer, &mut new_atlas_tx, &mut new_atlas_rx, None, EXTRA_ATLAS_COUNT) + .expect("Failed to create atlas texture"); Self { atlas, @@ -633,6 +665,8 @@ impl Terrain { shadow_chunks: Vec::default(), mesh_send_tmp: send, mesh_recv: recv, + new_atlas_tx, + new_atlas_rx, mesh_todo: HashMap::default(), mesh_todos_active: Arc::new(AtomicU64::new(0)), mesh_recv_overflow: 0.0, @@ -648,53 +682,93 @@ impl Terrain { } } + /// `count` is the number of extra atlases to attempt to create (we always try to write one and + /// read one when count is 0, and we can create extra atlases as count moves higher). + /// + /// `old_texture` is an optional argument representing an old texture with the same size and + /// (ideally) format as the new \atlas. fn make_atlas( + client: &Client, renderer: &mut Renderer, - ) -> Result<(AtlasAllocator, ColLights), RenderError> { + new_atlas_tx: &mut channel::Sender<(AtlasAllocator, Texture)>, + new_atlas_rx: &mut channel::Receiver<(AtlasAllocator, Texture)>, + old_texture: Option<&Texture>, + count: usize, + ) -> Result<(AtlasAllocator, ColLights), channel::RecvError> { span!(_guard, "make_atlas", "Terrain::make_atlas"); let max_texture_size = renderer.max_texture_size(); let atlas_size = guillotiere::Size::new(max_texture_size as i32, max_texture_size as i32); - let atlas = AtlasAllocator::with_options(atlas_size, &guillotiere::AllocatorOptions { - // TODO: Verify some good empirical constants. - small_size_threshold: 128, - large_size_threshold: 1024, - ..guillotiere::AllocatorOptions::default() - }); - let texture = renderer.create_texture_raw( - &wgpu::TextureDescriptor { - label: Some("Atlas texture"), - size: wgpu::Extent3d { - width: max_texture_size, - height: max_texture_size, - depth_or_array_layers: 1, + (0..=count).for_each(|_| { + let atlas = AtlasAllocator::with_options(atlas_size, &guillotiere::AllocatorOptions { + // TODO: Verify some good empirical constants. + small_size_threshold: 128, + large_size_threshold: 1024, + ..guillotiere::AllocatorOptions::default() + }); + let new_atlas_tx = new_atlas_tx.clone(); + let texture_fn = renderer.create_texture_raw( + wgpu::TextureDescriptor { + label: Some("Atlas texture"), + size: wgpu::Extent3d { + width: max_texture_size, + height: max_texture_size, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + // NOTE: COPY_SRC is used for the hack we use to work around zeroing, it + // shouldn't be needed otherwise. + usage: wgpu::TextureUsage::COPY_SRC | wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED, }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8Unorm, - usage: wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED, - }, - &wgpu::TextureViewDescriptor { - label: Some("Atlas texture view"), - format: Some(wgpu::TextureFormat::Rgba8Unorm), - dimension: Some(wgpu::TextureViewDimension::D2), - aspect: wgpu::TextureAspect::All, - base_mip_level: 0, - mip_level_count: None, - base_array_layer: 0, - array_layer_count: None, - }, - &wgpu::SamplerDescriptor { - label: Some("Atlas sampler"), - address_mode_u: wgpu::AddressMode::ClampToEdge, - address_mode_v: wgpu::AddressMode::ClampToEdge, - address_mode_w: wgpu::AddressMode::ClampToEdge, - mag_filter: wgpu::FilterMode::Linear, - min_filter: wgpu::FilterMode::Linear, - mipmap_filter: wgpu::FilterMode::Nearest, - ..Default::default() - }, - ); + wgpu::TextureViewDescriptor { + label: Some("Atlas texture view"), + format: Some(wgpu::TextureFormat::Rgba8Unorm), + dimension: Some(wgpu::TextureViewDimension::D2), + aspect: wgpu::TextureAspect::All, + base_mip_level: 0, + mip_level_count: None, + base_array_layer: 0, + array_layer_count: None, + }, + wgpu::SamplerDescriptor { + label: Some("Atlas sampler"), + address_mode_u: wgpu::AddressMode::ClampToEdge, + address_mode_v: wgpu::AddressMode::ClampToEdge, + address_mode_w: wgpu::AddressMode::ClampToEdge, + mag_filter: wgpu::FilterMode::Linear, + min_filter: wgpu::FilterMode::Linear, + mipmap_filter: wgpu::FilterMode::Nearest, + ..Default::default() + }, + ); + client + .state() + .slow_job_pool() + .spawn("TERRAIN_MESHING", move || { + // Construct the next atlas on a separate thread. If it doesn't get sent, it means + // the original channel was dropped, which implies the terrain scene data no longer + // exists, so we can just drop the result in that case. + let _ = new_atlas_tx.send((atlas, texture_fn())); + }); + }); + + // Receive the most recent available atlas. This call blocks only when there was no time + // to produce a fresh atlas between calls to make_atlas, which should hopefully be rare. + let (atlas, texture) = new_atlas_rx.recv()?; + // Needs to be fully initialized for partial writes to work on Dx12 AMD. + // + // Ideally, we would either not have to do this, or have an explicit clear available, but + // neither exists, and uploading a zero texture can be slow. Fortunately, we almost always + // have an existing texture to use in this case, so we can replace the explicit clear with + // a copy from the previous atlas, skipping the CPU->GPU upload. + if let Some(old_texture) = old_texture { + // TODO: Delay submission, don't just submit immediately out of convenience! + renderer.replace_texture(&texture, old_texture); + } else { + renderer.clear_texture(&texture); + } let col_light = renderer.terrain_bind_col_light(texture); Ok((atlas, col_light)) } @@ -1099,9 +1173,11 @@ impl Terrain { // The region to actually mesh let min_z = volume .iter() + .filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y) .fold(i32::MAX, |min, (_, chunk)| chunk.get_min_z().min(min)); let max_z = volume .iter() + .filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y) .fold(i32::MIN, |max, (_, chunk)| chunk.get_max_z().max(max)); let aabb = Aabb { @@ -1190,10 +1266,20 @@ impl Terrain { let alloc_size = guillotiere::Size::new(i32::from(tex_size.x), i32::from(tex_size.y)); + let new_atlas_tx = &mut self.new_atlas_tx; + let new_atlas_rx = &mut self.new_atlas_rx; let allocation = atlas.allocate(alloc_size).unwrap_or_else(|| { // Atlas allocation failure: try allocating a new texture and atlas. let (new_atlas, new_col_lights) = - Self::make_atlas(renderer).expect("Failed to create atlas texture"); + Self::make_atlas( + scene_data.client, + renderer, + new_atlas_tx, + new_atlas_rx, + Some(&col_lights.texture), + 0 + ) + .expect("Failed to create atlas texture"); // We reset the atlas and clear allocations from existing chunks, // even though we haven't yet @@ -1252,6 +1338,7 @@ impl Terrain { can_shadow_sun: false, blocks_of_interest: response.blocks_of_interest, z_bounds: mesh.z_bounds, + shadow_z_bounds: mesh.shadow_z_bounds, frustum_last_plane_index: 0, }); } else if let Some(chunk) = self.chunks.get_mut(&response.pos) { @@ -1386,11 +1473,11 @@ impl Terrain { // Ensure the chunk is within the PSR set. let chunk_box = math::Aabb { - min: math::Vec3::new(chunk_pos.x, chunk_pos.y, chunk.z_bounds.0), + min: math::Vec3::new(chunk_pos.x, chunk_pos.y, chunk.shadow_z_bounds.0), max: math::Vec3::new( chunk_pos.x + chunk_sz, chunk_pos.y + chunk_sz, - chunk.z_bounds.1, + chunk.shadow_z_bounds.1, ), };