WIP: Glow lighting performance improvements.

Messes up water somewhat and is incomplete for glow lighting, but should
be a fair amount faster in general.  Also reduces stuttering during
chunk generation by moving atlas creation and "clearing" off the main
thread (in fact, we don't even clear it except for the very first
allocated terrain atlas).
This commit is contained in:
Joshua Yanovski 2022-08-02 18:01:17 -07:00
parent f174472f55
commit 04bb1e32d9
7 changed files with 559 additions and 216 deletions

View File

@ -454,7 +454,7 @@ impl SpriteKind {
}
#[inline]
pub fn get_glow(&self) -> Option<u8> {
pub const fn get_glow(&self) -> Option<u8> {
match self {
SpriteKind::StreetLamp | SpriteKind::StreetLampTall => Some(24),
SpriteKind::Ember => Some(20),

View File

@ -10,6 +10,7 @@
once_cell,
stmt_expr_attributes,
trait_alias,
type_alias_impl_trait,
option_get_or_insert_default,
map_try_insert,
slice_as_chunks,

View File

@ -33,39 +33,83 @@ pub const SUNLIGHT: u8 = 24;
pub const SUNLIGHT_INV: f32 = 1.0 / SUNLIGHT as f32;
pub const MAX_LIGHT_DIST: i32 = SUNLIGHT as i32;
fn calc_light<V: RectRasterableVol<Vox = Block> + ReadVol + Debug>(
/// Working around lack of existential types.
///
/// See [https://github.com/rust-lang/rust/issues/42940]
type CalcLightFn<V, I> = impl Fn(Vec3<i32>) -> f32 + 'static + Send + Sync;
#[inline(always)]
fn flat_get<'a>(flat: &'a Vec<Block>, w: i32, h: i32, d: i32) -> impl Fn(Vec3<i32>) -> Block + 'a {
let hd = h * d;
let flat = &flat[0..(w * hd) as usize];
#[inline(always)] move |Vec3 { x, y, z }| {
// z can range from -1..range.size().d + 1
let z = z + 1;
flat[(x * hd + y * d + z) as usize]
/* match flat.get((x * hd + y * d + z) as usize).copied() {
Some(b) => b,
None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h),
} */
}
}
fn calc_light<'a,
V: RectRasterableVol<Vox = Block> + ReadVol + Debug + 'static,
I: Iterator<Item=(Vec3<i32>, u8)>,
/* F: /*for<'x> */for<'a> fn(&'a Vec<Block>) -> G, */
/* G: /*[&'x &'a (); 0], */Fn(Vec3<i32>) -> Block, */
>(
is_sunlight: bool,
// When above bounds
default_light: u8,
bounds: Aabb<i32>,
vol: &VolGrid2d<V>,
lit_blocks: impl Iterator<Item = (Vec3<i32>, u8)>,
) -> impl Fn(Vec3<i32>) -> f32 + 'static + Send + Sync {
/* span!(_guard, "calc_light");
range: Aabb<i32>,
vol: &'a VolGrid2d<V>,
lit_blocks: I,
flat: &'a Vec<Block>,
(w, h, d): (i32, i32, i32)
) -> CalcLightFn<V, I> {
span!(_guard, "calc_light");
const UNKNOWN: u8 = 255;
const OPAQUE: u8 = 254;
let outer = Aabb {
min: bounds.min - Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1),
max: bounds.max + Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1),
min: bounds.min/* - Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1) */ - Vec3::new(0, 0, 1),
max: bounds.max/* + Vec3::new(SUNLIGHT as i32, SUNLIGHT as i32, 1) */ + Vec3::new(0, 0, 1),
};
let mut vol_cached = vol.cached();
let range_delta = outer.min - range.min;
let mut light_map = vec![UNKNOWN; outer.size().product() as usize];
/* let mut vol_cached = vol.cached(); */
let mut light_map_ = vec![UNKNOWN; outer.size().product() as usize];
let (w_, h_, d_) = outer.clone().size().into_tuple();
let wh_ = w_ * h_;
let light_map = &mut light_map_[0..(wh_ * d_) as usize];
let lm_idx = {
let (w, h, _) = outer.clone().size().into_tuple();
move |x, y, z| (w * h * z + h * x + y) as usize
#[inline(always)] move |x, y, z| {
(wh_ * z + h_ * x + y) as usize
}
};
// Light propagation queue
/* // Light propagation queue
let mut prop_que = lit_blocks
.map(|(pos, light)| {
let rpos = pos - outer.min;
light_map[lm_idx(rpos.x, rpos.y, rpos.z)] = light.min(SUNLIGHT); // Brightest light
(rpos.x as u8, rpos.y as u8, rpos.z as u16)
})
.collect::<VecDeque<_>>();
// Start sun rays
.collect::<VecDeque<_>>(); */
let mut prop_que = vec![Vec::new(); usize::from(SUNLIGHT) + 1];
let mut prop_queue = &mut prop_que[..usize::from(SUNLIGHT) + 1];
lit_blocks.for_each(|(pos, light)| {
/* println!("Lighting {:?}: {:?}", pos, light); */
let rpos = pos - outer.min;
let glow = light.min(SUNLIGHT);
light_map[lm_idx(rpos.x, rpos.y, rpos.z)] = glow; // Brightest light
prop_que[usize::from(glow)].push((rpos.x as u8, rpos.y as u8, rpos.z as u16));
});
/* // Start sun rays
if is_sunlight {
for x in 0..outer.size().w {
for y in 0..outer.size().h {
@ -89,110 +133,150 @@ fn calc_light<V: RectRasterableVol<Vox = Block> + ReadVol + Debug>(
}
}
}
}
} */
// Determines light propagation
let propagate = |src: u8,
let flat_get = flat_get(flat, w, h, d);
let propagate = #[inline(always)] |src: u8,
dest: &mut u8,
pos: Vec3<i32>,
prop_que: &mut VecDeque<_>,
vol: &mut CachedVolGrid2d<V>| {
if *dest != OPAQUE {
if *dest == UNKNOWN {
if vol
prop_que: &mut /*VecDeque*/Vec<_>,
/* vol: &mut CachedVolGrid2d<V> */| {
let dst = *dest;
/* if dst != OPAQUE */{
if dst < src || dst == UNKNOWN /* {
if */&& /* vol
.get(outer.min + pos)
.ok()
.map_or(false, |b| b.is_fluid())
.map_or(false, |b| b.is_fluid()) */
flat_get(/*[], */pos + range_delta).is_fluid()
{
*dest = src.saturating_sub(1);
/* *dest = src.saturating_sub(1); */
*dest = src;
// Can't propagate further
if *dest > 1 {
prop_que.push_back((pos.x as u8, pos.y as u8, pos.z as u16));
if /* *dest */src > 1 {
prop_que./*push_back*/push((pos.x as u8, pos.y as u8, pos.z as u16));
}
} else {
}/* else {
*dest = OPAQUE;
}
} else if *dest < src.saturating_sub(1) {
*dest = src - 1;
} */
/*} else if *dest < src/* .saturating_sub(1) */ {
*dest = src/* - 1 */;
// Can't propagate further
if *dest > 1 {
prop_que.push_back((pos.x as u8, pos.y as u8, pos.z as u16));
if /* *dest */src > 1 {
prop_que./*push_back*/push((pos.x as u8, pos.y as u8, pos.z as u16));
}
}
} */
}
};
// Propagate light
while let Some(pos) = prop_que.pop_front() {
let pos = Vec3::new(pos.0 as i32, pos.1 as i32, pos.2 as i32);
let light = light_map[lm_idx(pos.x, pos.y, pos.z)];
//
// NOTE: We start at 2 because starting at 1 would propagate light of brightness 0 to
// neighbors.
(2..usize::from(SUNLIGHT) + 1).rev().for_each(|light| {
let (front, back) = prop_que.split_at_mut(light);
let prop_que = front.last_mut().expect("Split at least at index 2, so front must have at least 1 element.");
let front = back.first_mut().expect("Split at most at SUNLIGHT, and array has length SUNLIGHT+1, so back must have at least 1 element.");
let light = light as u8;
// NOTE: Always in bounds and ≥ 1, since light ≥ 2.
let new_light = light - 1;
/* println!("Light: {:?}", light); */
/* while let Some(pos) = prop_que.pop_front() */front.iter().for_each(|pos| {
let pos = Vec3::new(pos.0 as i32, pos.1 as i32, pos.2 as i32);
let light_ = light_map[lm_idx(pos.x, pos.y, pos.z)];
if light != light_ {
// This block got modified before it could emit anything.
return;
}
/* println!("Pos: {:?}", pos); */
// Up
// Bounds checking
if pos.z + 1 < outer.size().d {
propagate(
light,
light_map.get_mut(lm_idx(pos.x, pos.y, pos.z + 1)).unwrap(),
Vec3::new(pos.x, pos.y, pos.z + 1),
&mut prop_que,
&mut vol_cached,
)
}
// Down
if pos.z > 0 {
propagate(
light,
light_map.get_mut(lm_idx(pos.x, pos.y, pos.z - 1)).unwrap(),
Vec3::new(pos.x, pos.y, pos.z - 1),
&mut prop_que,
&mut vol_cached,
)
}
// The XY directions
if pos.y + 1 < outer.size().h {
propagate(
light,
light_map.get_mut(lm_idx(pos.x, pos.y + 1, pos.z)).unwrap(),
Vec3::new(pos.x, pos.y + 1, pos.z),
&mut prop_que,
&mut vol_cached,
)
}
if pos.y > 0 {
propagate(
light,
light_map.get_mut(lm_idx(pos.x, pos.y - 1, pos.z)).unwrap(),
Vec3::new(pos.x, pos.y - 1, pos.z),
&mut prop_que,
&mut vol_cached,
)
}
if pos.x + 1 < outer.size().w {
propagate(
light,
light_map.get_mut(lm_idx(pos.x + 1, pos.y, pos.z)).unwrap(),
Vec3::new(pos.x + 1, pos.y, pos.z),
&mut prop_que,
&mut vol_cached,
)
}
if pos.x > 0 {
propagate(
light,
light_map.get_mut(lm_idx(pos.x - 1, pos.y, pos.z)).unwrap(),
Vec3::new(pos.x - 1, pos.y, pos.z),
&mut prop_que,
&mut vol_cached,
)
}
}
// Up
// Bounds checking
// NOTE: Array accesses are all safe even if they are technically out of bounds,
// because we have margin on all sides and the light sources only come from within the
// proper confines of the volume. This allows us to fetch them before the if
// statements.
{
/* let light_map = light_map.as_mut_ptr();
let z_up = &mut *light_map.offset(lm_idx(pos.x, pos.y, pos.z + 1) as isize);
let z_down = &mut *light_map.offset(lm_idx(pos.x, pos.y, pos.z - 1) as isize);
let y_up = &mut *light_map.offset(lm_idx(pos.x, pos.y + 1, pos.z) as isize);
let y_down = &mut *light_map.offset(lm_idx(pos.x, pos.y - 1, pos.z) as isize);
let x_up = &mut *light_map.offset(lm_idx(pos.x + 1, pos.y, pos.z) as isize);
let x_down = &mut *light_map.offset(lm_idx(pos.x - 1, pos.y, pos.z) as isize); */
if pos.z + 1 < outer.size().d {
let z_up = &mut light_map[lm_idx(pos.x, pos.y, pos.z + 1)];
propagate(
new_light,
z_up,
Vec3::new(pos.x, pos.y, pos.z + 1),
/*&mut */prop_que,
/* &mut vol_cached, */
)
}
// Down
if pos.z > 0 {
let z_down = &mut light_map[lm_idx(pos.x, pos.y, pos.z - 1)];
propagate(
new_light,
z_down,
Vec3::new(pos.x, pos.y, pos.z - 1),
/*&mut */prop_que,
/* &mut vol_cached, */
)
}
// The XY directions
if pos.y + 1 < outer.size().h {
let y_up = &mut light_map[lm_idx(pos.x, pos.y + 1, pos.z)];
propagate(
new_light,
y_up,
Vec3::new(pos.x, pos.y + 1, pos.z),
/*&mut */prop_que,
/* &mut vol_cached, */
)
}
if pos.y > 0 {
let y_down = &mut light_map[lm_idx(pos.x, pos.y - 1, pos.z)];
propagate(
new_light,
y_down,
Vec3::new(pos.x, pos.y - 1, pos.z),
/*&mut */prop_que,
/* &mut vol_cached, */
)
}
if pos.x + 1 < outer.size().w {
let x_up = &mut light_map[lm_idx(pos.x + 1, pos.y, pos.z)];
propagate(
new_light,
x_up,
Vec3::new(pos.x + 1, pos.y, pos.z),
/*&mut */prop_que,
/* &mut vol_cached, */
)
}
if pos.x > 0 {
let x_down = &mut light_map[lm_idx(pos.x - 1, pos.y, pos.z)];
propagate(
new_light,
x_down,
Vec3::new(pos.x - 1, pos.y, pos.z),
/*&mut */prop_que,
/* &mut vol_cached, */
)
}
}
});
});
let min_bounds = Aabb {
min: bounds.min - 1,
max: bounds.max + 1,
min: bounds.min - Vec3::unit_z(),
max: bounds.max + Vec3::unit_z(),
};
// Minimise light map to reduce duplication. We can now discard light info
/* // Minimise light map to reduce duplication. We can now discard light info
// for blocks outside of the chunk borders.
let mut light_map2 = vec![UNKNOWN; min_bounds.size().product() as usize];
let lm_idx2 = {
@ -208,13 +292,14 @@ fn calc_light<V: RectRasterableVol<Vox = Block> + ReadVol + Debug>(
}
}
drop(light_map); */
drop(light_map_); */
let light_map2 = light_map_;
move |wpos| {
if is_sunlight { 1.0 } else { 0.0 }
/* let pos = wpos - min_bounds.min;
#[inline(always)] move |wpos| {
if is_sunlight { return 1.0 }/* else { 0.0 } */
let pos = wpos - min_bounds.min;
let l = light_map2
.get(lm_idx2(pos.x, pos.y, pos.z))
.get(/*lm_idx2*/lm_idx(pos.x, pos.y, pos.z))
.copied()
.unwrap_or(default_light);
@ -222,7 +307,7 @@ fn calc_light<V: RectRasterableVol<Vox = Block> + ReadVol + Debug>(
l as f32 * SUNLIGHT_INV
} else {
0.0
} */
}
}
}
@ -230,7 +315,7 @@ fn calc_light<V: RectRasterableVol<Vox = Block> + ReadVol + Debug>(
#[inline(always)]
pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + 'static>(
vol: &'a VolGrid2d<V>,
(range, max_texture_size, _boi): (Aabb<i32>, Vec2<u16>, &'a BlocksOfInterest),
(range, max_texture_size, boi): (Aabb<i32>, Vec2<u16>, &'a BlocksOfInterest),
) -> MeshGen<
TerrainVertex,
FluidVertex,
@ -248,51 +333,94 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
"<&VolGrid2d as Meshable<_, _>>::generate_mesh"
);
// Find blocks that should glow
// TODO: Search neighbouring chunks too!
// let glow_blocks = boi.lights
// .iter()
// .map(|(pos, glow)| (*pos + range.min.xy(), *glow));
/* DefaultVolIterator::new(vol, range.min - MAX_LIGHT_DIST, range.max + MAX_LIGHT_DIST)
.filter_map(|(pos, block)| block.get_glow().map(|glow| (pos, glow))); */
let mut glow_blocks = Vec::new();
/* // TODO: This expensive, use BlocksOfInterest instead
let mut volume = vol.cached();
for x in -MAX_LIGHT_DIST..range.size().w + MAX_LIGHT_DIST {
for y in -MAX_LIGHT_DIST..range.size().h + MAX_LIGHT_DIST {
for z in -1..range.size().d + 1 {
let wpos = range.min + Vec3::new(x, y, z);
volume
.get(wpos)
.ok()
.and_then(|b| b.get_glow())
.map(|glow| glow_blocks.push((wpos, glow)));
}
}
} */
// Calculate chunk lighting (sunlight defaults to 1.0, glow to 0.0)
let light = calc_light(true, SUNLIGHT, range, vol, core::iter::empty());
let glow = calc_light(false, 0, range, vol, glow_blocks.into_iter());
let mut opaque_limits = None::<Limits>;
let mut fluid_limits = None::<Limits>;
let mut air_limits = None::<Limits>;
let mut flat;
let flat_get = {
let (w, h, d) = range.size().into_tuple();
// z can range from -1..range.size().d + 1
let d = d + 2;
{
span!(_guard, "copy to flat array");
let (w, h, d) = range.size().into_tuple();
// z can range from -1..range.size().d + 1
let d = d + 2;
let hd = h * d;
/*let flat = */{
let mut volume = vol.cached();
/* let mut volume = vol.cached(); */
const AIR: Block = Block::air(common::terrain::sprite::SpriteKind::Empty);
// TODO: Once we can manage it sensibly, consider using something like
// Option<Block> instead of just assuming air.
/*let mut */flat = vec![AIR; (w * h * d) as usize];
let mut i = 0;
/*let mut */flat = vec![AIR; (w * /*h * d*/hd) as usize]
/* Vec::with_capacity((w * /*h * d*/hd) as usize) */
;
let flat = &mut flat/*.spare_capacity_mut()*/[0..(w * hd) as usize];
/* /*volume*/vol.iter().for_each(|(chunk_key, chunk)| {
let corner = chunk.key_pos(chunk_key);
}); */
let flat_range = Aabb {
min: range.min - Vec3::new(0, 0, 1),
max: range.max - Vec3::new(1, 1, 0),
};
let min_chunk_key = vol.pos_key(flat_range.min);
let max_chunk_key = vol.pos_key(flat_range.max);
(min_chunk_key.x..max_chunk_key.x + 1).for_each(|key_x| {
(min_chunk_key.y..max_chunk_key.y + 1).for_each(|key_y| {
let key = Vec2::new(key_x, key_y);
let chonk = vol.get_key(key).expect("All keys in range must have chonks.");
/* vol.iter().for_each(|(key, chonk)| { */
let chonk = &*chonk;
let pos = vol.key_pos(key);
// Calculate intersection of Aabb and this chunk
// TODO: should we do this more implicitly as part of the loop
// TODO: this probably has to be computed in the chunk.for_each_in() as well
// maybe remove here?
let intersection_ = flat_range.intersection(Aabb {
min: pos.with_z(i32::MIN),
// -1 here since the Aabb is inclusive and chunk_offs below will wrap it if
// it's outside the range of the chunk
max: (pos + VolGrid2d::<V>::chunk_size().map(|e| e as i32) - 1).with_z(i32::MAX),
});
// Map intersection into chunk coordinates
let x_diff = intersection_.min.x - flat_range.min.x;
let y_diff = intersection_.min.y - flat_range.min.y;
let z_diff = -range.min.z;
let y_rem = flat_range.max.y - intersection_.max.y;
let x_off = ((y_diff + y_rem) * d) as usize;
let intersection = Aabb {
min: VolGrid2d::<V>::chunk_offs(intersection_.min) + Vec3::new(0, 0, z_diff),
max: VolGrid2d::<V>::chunk_offs(intersection_.max) + Vec3::new(1, 1, z_diff + 1),
};
/* [[0 ..1]; [0 ..1]; [0..d]]
[[0 ..1]; [1 ..h-1]; [0..d]]
[[0 ..1]; [h-1..h]; [0..d]]
// How to handle middle?
// Answer:
[[w-1..w]; [0 ..1]; [0..d]]
[[w-1..w]; [1 ..h-1]; [0..d]]
[[w-1..w]; [h-1..h]; [0..d]]
[1,1; d] */
let mut i = (x_diff * hd + y_diff * d) as usize;
let hd_ = (intersection.size().h * d) as usize;
// dbg!(pos, intersection_, intersection, range, flat_range, x_diff, y_diff, z_diff, y_rem, x_off, i);
(intersection.min.x..intersection.max.x).for_each(|x| {
let flat = &mut flat[i..i + /*intersection.size().y * intersection.size().z*/hd_];
flat.chunks_exact_mut(d as usize).enumerate().for_each(|(y, flat)| {
let y = y as i32 + intersection.min.y;
/* (intersection.min.y..intersection.max.y).for_each(|y| { */
/* let mut i = (x * hd + y * d) as usize; */
/* chonk.for_each_in(intersection, |pos_offset, block| {
pos_offset.z += z_diff;
}); */
flat.into_iter().enumerate().for_each(|(z, flat)| {
let z = z as i32 + intersection.min.z;
/* (intersection.min.z..intersection.max.z).for_each(|z| { */
/* let mut i = ((x_diff + (x - intersection.min.x)) * hd + (y_diff + (y - intersection.min.y)) * d + (z - intersection.min.z)) as usize; */
/* vol.iter(flat_range, |wpos, block| {
let z = wpos.z - range.min.z; */
/* let mut i = 0;
for x in 0..range.size().w {
for y in 0..range.size().h {
for z in -1..range.size().d + 1 {
@ -302,7 +430,10 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
.map(|b| *b)
// TODO: Replace with None or some other more reasonable value,
// since it's not clear this will work properly with liquid.
.unwrap_or(AIR);
.unwrap_or(AIR); */
/* if let Ok(&block) = chonk.get(Vec3::new(x, y, z - z_diff)) */
let block = chonk.get(Vec3::new(x, y, z - z_diff)).copied().unwrap_or(AIR);
{
if block.is_opaque() {
opaque_limits = opaque_limits
.map(|l| l.including(z))
@ -317,30 +448,47 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
.map(|l| l.including(z))
.or_else(|| Some(Limits::from_value(z)));
};
flat[i] = block;
i += 1;
/*flat[i] = block*//*unsafe { flat.get_unchecked_mut(i) }*//*flat[i].write(block);*/
/* flat.write(block); */
*flat = block;
}
/* i += 1; */
/* }
}
}
/* flat */
};
let hd = h * d;
let flat = &flat[0..(w * hd) as usize];
#[inline(always)] move |Vec3 { x, y, z }| {
// z can range from -1..range.size().d + 1
let z = z + 1;
flat[(x * hd + y * d + z) as usize]
/* match flat.get((x * hd + y * d + z) as usize).copied() {
Some(b) => b,
None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h),
} */
/* flat */
/* }); */
/* }); */
});
// i += d;
/* }); */
});
// i += x_off;
i += hd as usize;
});
/* }); */
});
});
}
};
/* unsafe { flat.set_len((w * hd) as usize); } */
}
// Constrain iterated area
let mut opaque_mesh = Mesh::new();
let mut fluid_mesh = Mesh::new();
let (z_start, z_end) = match (air_limits, fluid_limits, opaque_limits) {
(Some(air), Some(fluid), Some(opaque)) => air.three_way_intersection(fluid, opaque),
(Some(air), Some(fluid), Some(opaque)) => {
let air_fluid = air.intersection(fluid);
if let Some(intersection) = air_fluid.filter(|limits| limits.min + 1 == limits.max) {
// If there is a planar air-fluid boundary, just draw it directly and avoid
// redundantly meshing the whole fluid volume, then interect the ground-fluid
// and ground-air meshes to make sure we don't miss anything.
either_with(air.intersection(opaque), fluid.intersection(opaque), Limits::union)
} else {
// Otherwise, do a normal three-way intersection.
air.three_way_intersection(fluid, opaque)
}
},
(Some(air), Some(fluid), None) => air.intersection(fluid),
(Some(air), None, Some(opaque)) => air.intersection(opaque),
(None, Some(fluid), Some(opaque)) => fluid.intersection(opaque),
@ -358,6 +506,62 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
(start, end)
});
// Calculate chunk lighting (sunlight defaults to 1.0, glow to 0.0)
let mut glow_range = range;
if let Some(opaque) = opaque_limits {
glow_range.min.z = z_start.max(opaque.min) + range.min.z;
glow_range.max.z = (z_end.min(opaque.max) + range.min.z).max(glow_range.min.z);
}
// Find blocks that should glow
// TODO: Search neighbouring chunks too!
let mut glow_block_min = glow_range.max.z;
let mut glow_block_max = glow_range.min.z;
let mut glow_blocks = boi.lights
.iter()
.map(|(pos, glow)| {
let pos_z = pos.z.clamped(glow_range.min.z, glow_range.max.z);
let glow = (i32::from(*glow) - (pos_z - pos.z).abs()).max(0);
glow_block_min = glow_block_min.min(pos_z - glow);
glow_block_max = glow_block_max.max(pos_z + glow);
((*pos + glow_range.min.xy()).with_z(pos_z), glow as u8)
})
// FIXME: Why is Rust forcing me to collect to Vec here?
.collect::<Vec<_>>();
glow_range.min.z = glow_block_min.clamped(glow_range.min.z, glow_range.max.z);
glow_range.max.z = glow_block_max.clamped(glow_range.min.z, glow_range.max.z);
/* if glow_range.min.z != glow_range.max.z {
println!("{:?}", glow_range);
} */
let mut light_range = glow_range;
light_range.min.z = light_range.max.z;
/* // Sort glowing blocks in decreasing order by glow strength. This makes it somewhat less
// likely that a smaller glow will have to be drawn.
glow_blocks.sort_unstable_by_key(|(_, glow)| core::cmp::Reverse(*glow)); */
/* DefaultVolIterator::new(vol, range.min - MAX_LIGHT_DIST, range.max + MAX_LIGHT_DIST)
.filter_map(|(pos, block)| block.get_glow().map(|glow| (pos, glow))); */
/* let mut glow_blocks = Vec::new(); */
/* // TODO: This expensive, use BlocksOfInterest instead
let mut volume = vol.cached();
for x in -MAX_LIGHT_DIST..range.size().w + MAX_LIGHT_DIST {
for y in -MAX_LIGHT_DIST..range.size().h + MAX_LIGHT_DIST {
for z in -1..range.size().d + 1 {
let wpos = range.min + Vec3::new(x, y, z);
volume
.get(wpos)
.ok()
.and_then(|b| b.get_glow())
.map(|glow| glow_blocks.push((wpos, glow)));
}
}
} */
let light = calc_light(true, SUNLIGHT, light_range, range, vol, core::iter::empty(), &flat, (w, h, d));
let glow = calc_light(false, 0, glow_range, range, vol, glow_blocks.into_iter(), &flat, (w, h, d));
let max_size = max_texture_size;
assert!(z_end >= z_start);
let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1);
@ -375,6 +579,7 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
let greedy_size_cross = Vec3::new(greedy_size.x - 1, greedy_size.y - 1, greedy_size.z);
let draw_delta = Vec3::new(1, 1, z_start);
let flat_get = flat_get(&flat, w, h, d);
let get_light = #[inline(always)] |_: &mut (), pos: Vec3<i32>| {
if flat_get(pos).is_opaque() {
0.0
@ -400,8 +605,6 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
let mut greedy =
GreedyMesh::<guillotiere::SimpleAtlasAllocator>::new(max_size, greedy::general_config());
let mut opaque_mesh = Mesh::new();
let mut fluid_mesh = Mesh::new();
greedy.push(GreedyConfig {
data: (),
draw_delta,

View File

@ -1378,15 +1378,20 @@ impl Renderer {
}
/// Create a new raw texture.
pub fn create_texture_raw(
///
/// NOTE: This is done lazily--the returned function must be invoked to actually create the
/// texture. This allows creating the texture on another thread.
pub fn create_texture_raw<'a>(
&mut self,
texture_info: &wgpu::TextureDescriptor,
view_info: &wgpu::TextureViewDescriptor,
sampler_info: &wgpu::SamplerDescriptor,
) -> Texture {
let texture = Texture::new_raw(&self.device, texture_info, view_info, sampler_info);
texture.clear(&self.queue); // Needs to be fully initialized for partial writes to work on Dx12 AMD
texture
texture_info: wgpu::TextureDescriptor<'a>,
view_info: wgpu::TextureViewDescriptor<'a>,
sampler_info: wgpu::SamplerDescriptor<'a>,
) -> impl FnOnce() -> Texture + Send + Sync + 'a {
let device = Arc::clone(&self.device);
move || {
let texture = Texture::new_raw(&device, &texture_info, &view_info, &sampler_info);
texture
}
}
/// Create a new texture from the provided image.
@ -1429,6 +1434,21 @@ impl Renderer {
texture.update(&self.queue, offset, size, bytemuck::cast_slice(data))
}
/// Clears the texture data to 0.
pub fn clear_texture(
&mut self,
texture: &Texture, /* <T> */
) {
texture.clear(&self.queue)
}
/// Replaces the destination texture with the contents of the source texture.
///
/// The source size should at least fit within the destination texture's size.
pub fn replace_texture(&mut self, dest: &Texture, source: &Texture) {
dest.replace(&self.device, &self.queue, source);
}
/// Queue to obtain a screenshot on the next frame render
pub fn create_screenshot(
&mut self,

View File

@ -190,6 +190,38 @@ impl Texture {
self.update(queue, [0, 0], [size.width, size.height], &zeros);
}
/// Replaces this texture with the contents of another texture.
///
/// The source size should at least fit within this texture's size.
pub fn replace(&self, device: &wgpu::Device, queue: &wgpu::Queue, texture: &Self) {
let mut encoder = device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Replace the texture buffer"),
});
// Copy image
encoder.copy_texture_to_texture(
wgpu::ImageCopyTexture {
texture: &texture.tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
},
wgpu::ImageCopyTexture {
texture: &self.tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
},
wgpu::Extent3d {
width: texture.size.width,
height: texture.size.height,
depth_or_array_layers: 1,
},
);
// TODO: Delay submission, don't just submit immediately out of convenience!
queue.submit(std::iter::once(encoder.finish()));
}
/// Update a texture with the given data (used for updating the glyph cache
/// texture).
pub fn update(&self, queue: &wgpu::Queue, offset: [u32; 2], size: [u32; 2], data: &[u8]) {

View File

@ -295,7 +295,7 @@ impl Scene {
let globals_bind_group = renderer.bind_globals(&data, lod.get_data());
let terrain = Terrain::new(renderer, &data, lod.get_data(), sprite_render_context);
let terrain = Terrain::new(client, renderer, &data, lod.get_data(), sprite_render_context);
Self {
data,

View File

@ -12,7 +12,7 @@ use crate::{
pipelines::{self, ColLights},
ColLightInfo, FirstPassDrawer, FluidVertex, GlobalModel, Instances, LodData, Mesh, Model,
RenderError, Renderer, SpriteGlobalsBindGroup, SpriteInstance, SpriteVertex, SpriteVerts,
TerrainLocals, TerrainShadowDrawer, TerrainVertex, SPRITE_VERT_PAGE_SIZE,
TerrainLocals, TerrainShadowDrawer, TerrainVertex, Texture, SPRITE_VERT_PAGE_SIZE,
},
};
@ -20,6 +20,7 @@ use super::{
camera::{self, Camera},
math, SceneData, RAIN_THRESHOLD,
};
use client::Client;
use common::{
assets::{self, AssetExt, DotVoxAsset},
figure::Segment,
@ -97,6 +98,7 @@ pub struct TerrainChunkData {
can_shadow_point: bool,
can_shadow_sun: bool,
z_bounds: (f32, f32),
shadow_z_bounds: (f32, f32),
frustum_last_plane_index: u8,
}
@ -112,6 +114,7 @@ struct ChunkMeshState {
/// Just the mesh part of a mesh worker response.
pub struct MeshWorkerResponseMesh {
z_bounds: (f32, f32),
shadow_z_bounds: (f32, f32),
opaque_mesh: Mesh<TerrainVertex>,
fluid_mesh: Mesh<FluidVertex>,
col_lights_info: ColLightInfo,
@ -263,6 +266,11 @@ fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + '
mesh = Some(MeshWorkerResponseMesh {
// TODO: Take sprite bounds into account somehow?
z_bounds: (bounds.min.z, bounds.max.z),
// NOTE: It should be pretty much impossible to see *just* the edge of an object in
// another chunk without seeing any of the actual voxel associated with that object, so
// we can ignore such cases for the purposes of determining a shadow bounding box (but
// not visibility, unfortunately).
shadow_z_bounds: ((chunk.get_min_z() as f32).max(bounds.min.z), (chunk.get_max_z() as f32).min(bounds.max.z)),
opaque_mesh,
fluid_mesh,
col_lights_info,
@ -406,6 +414,8 @@ pub struct Terrain<V: RectRasterableVol = TerrainChunk> {
// workers.
mesh_send_tmp: channel::Sender<MeshWorkerResponse>,
mesh_recv: channel::Receiver<MeshWorkerResponse>,
new_atlas_tx: channel::Sender<(AtlasAllocator, Texture)>,
new_atlas_rx: channel::Receiver<(AtlasAllocator, Texture)>,
mesh_todo: HashMap<Vec2<i32>, ChunkMeshState>,
mesh_todos_active: Arc<AtomicU64>,
mesh_recv_overflow: f32,
@ -614,6 +624,7 @@ impl SpriteRenderContext {
impl<V: RectRasterableVol> Terrain<V> {
pub fn new(
client: &Client,
renderer: &mut Renderer,
global_model: &GlobalModel,
lod_data: &LodData,
@ -623,8 +634,29 @@ impl<V: RectRasterableVol> Terrain<V> {
// with worker threads that are meshing chunks.
let (send, recv) = channel::unbounded();
// Number of background atlases to have prepared at a time. It is unlikely we would ever
// want to change this value from 1, unless rendering gets super speedy or our atlas count
// starts exploding. This should never be set to 0 unless you are okay with blocking every
// single atlas (this technically improves memory utilization, but the tradeoff can result
// in really bad stuttering; if this becomes a desirable configuration, we can rework the
// way the rest of Terrain::maintain works so that it can deal with no atlas being
// available).
const EXTRA_ATLAS_COUNT: usize = 1;
// Create a second mpsc pair for offloading atlas allocation to a second thread. This way,
// a second thread is usually ready to produce a new atlas the moment we ask for it, so we
// avoid waiting longer than necessary. The channel holds just BACKGROUND_ATLASE_COUNT
// atlases at a time because we always (except at the very beginning) start creating a new
// atlas at the same time that we receive a previous one, and at the beginning we create
// 1 + EXTRA_ATLAS_COUNT atlases (one of which we immediately consume), so we can
// never have more then one atlas in transit at a time.
let (mut new_atlas_tx, mut new_atlas_rx) = channel::bounded(EXTRA_ATLAS_COUNT);
// We start by creating an extra atlas, ensuring that we are always building one more atlas
// than we currently need in a background job.
let (atlas, col_lights) =
Self::make_atlas(renderer).expect("Failed to create atlas texture");
Self::make_atlas(client, renderer, &mut new_atlas_tx, &mut new_atlas_rx, None, EXTRA_ATLAS_COUNT)
.expect("Failed to create atlas texture");
Self {
atlas,
@ -633,6 +665,8 @@ impl<V: RectRasterableVol> Terrain<V> {
shadow_chunks: Vec::default(),
mesh_send_tmp: send,
mesh_recv: recv,
new_atlas_tx,
new_atlas_rx,
mesh_todo: HashMap::default(),
mesh_todos_active: Arc::new(AtomicU64::new(0)),
mesh_recv_overflow: 0.0,
@ -648,53 +682,93 @@ impl<V: RectRasterableVol> Terrain<V> {
}
}
/// `count` is the number of extra atlases to attempt to create (we always try to write one and
/// read one when count is 0, and we can create extra atlases as count moves higher).
///
/// `old_texture` is an optional argument representing an old texture with the same size and
/// (ideally) format as the new \atlas.
fn make_atlas(
client: &Client,
renderer: &mut Renderer,
) -> Result<(AtlasAllocator, ColLights<pipelines::terrain::Locals>), RenderError> {
new_atlas_tx: &mut channel::Sender<(AtlasAllocator, Texture)>,
new_atlas_rx: &mut channel::Receiver<(AtlasAllocator, Texture)>,
old_texture: Option<&Texture>,
count: usize,
) -> Result<(AtlasAllocator, ColLights<pipelines::terrain::Locals>), channel::RecvError> {
span!(_guard, "make_atlas", "Terrain::make_atlas");
let max_texture_size = renderer.max_texture_size();
let atlas_size = guillotiere::Size::new(max_texture_size as i32, max_texture_size as i32);
let atlas = AtlasAllocator::with_options(atlas_size, &guillotiere::AllocatorOptions {
// TODO: Verify some good empirical constants.
small_size_threshold: 128,
large_size_threshold: 1024,
..guillotiere::AllocatorOptions::default()
});
let texture = renderer.create_texture_raw(
&wgpu::TextureDescriptor {
label: Some("Atlas texture"),
size: wgpu::Extent3d {
width: max_texture_size,
height: max_texture_size,
depth_or_array_layers: 1,
(0..=count).for_each(|_| {
let atlas = AtlasAllocator::with_options(atlas_size, &guillotiere::AllocatorOptions {
// TODO: Verify some good empirical constants.
small_size_threshold: 128,
large_size_threshold: 1024,
..guillotiere::AllocatorOptions::default()
});
let new_atlas_tx = new_atlas_tx.clone();
let texture_fn = renderer.create_texture_raw(
wgpu::TextureDescriptor {
label: Some("Atlas texture"),
size: wgpu::Extent3d {
width: max_texture_size,
height: max_texture_size,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
// NOTE: COPY_SRC is used for the hack we use to work around zeroing, it
// shouldn't be needed otherwise.
usage: wgpu::TextureUsage::COPY_SRC | wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsage::COPY_DST | wgpu::TextureUsage::SAMPLED,
},
&wgpu::TextureViewDescriptor {
label: Some("Atlas texture view"),
format: Some(wgpu::TextureFormat::Rgba8Unorm),
dimension: Some(wgpu::TextureViewDimension::D2),
aspect: wgpu::TextureAspect::All,
base_mip_level: 0,
mip_level_count: None,
base_array_layer: 0,
array_layer_count: None,
},
&wgpu::SamplerDescriptor {
label: Some("Atlas sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
},
);
wgpu::TextureViewDescriptor {
label: Some("Atlas texture view"),
format: Some(wgpu::TextureFormat::Rgba8Unorm),
dimension: Some(wgpu::TextureViewDimension::D2),
aspect: wgpu::TextureAspect::All,
base_mip_level: 0,
mip_level_count: None,
base_array_layer: 0,
array_layer_count: None,
},
wgpu::SamplerDescriptor {
label: Some("Atlas sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
},
);
client
.state()
.slow_job_pool()
.spawn("TERRAIN_MESHING", move || {
// Construct the next atlas on a separate thread. If it doesn't get sent, it means
// the original channel was dropped, which implies the terrain scene data no longer
// exists, so we can just drop the result in that case.
let _ = new_atlas_tx.send((atlas, texture_fn()));
});
});
// Receive the most recent available atlas. This call blocks only when there was no time
// to produce a fresh atlas between calls to make_atlas, which should hopefully be rare.
let (atlas, texture) = new_atlas_rx.recv()?;
// Needs to be fully initialized for partial writes to work on Dx12 AMD.
//
// Ideally, we would either not have to do this, or have an explicit clear available, but
// neither exists, and uploading a zero texture can be slow. Fortunately, we almost always
// have an existing texture to use in this case, so we can replace the explicit clear with
// a copy from the previous atlas, skipping the CPU->GPU upload.
if let Some(old_texture) = old_texture {
// TODO: Delay submission, don't just submit immediately out of convenience!
renderer.replace_texture(&texture, old_texture);
} else {
renderer.clear_texture(&texture);
}
let col_light = renderer.terrain_bind_col_light(texture);
Ok((atlas, col_light))
}
@ -1099,9 +1173,11 @@ impl<V: RectRasterableVol> Terrain<V> {
// The region to actually mesh
let min_z = volume
.iter()
.filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y)
.fold(i32::MAX, |min, (_, chunk)| chunk.get_min_z().min(min));
let max_z = volume
.iter()
.filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y)
.fold(i32::MIN, |max, (_, chunk)| chunk.get_max_z().max(max));
let aabb = Aabb {
@ -1190,10 +1266,20 @@ impl<V: RectRasterableVol> Terrain<V> {
let alloc_size =
guillotiere::Size::new(i32::from(tex_size.x), i32::from(tex_size.y));
let new_atlas_tx = &mut self.new_atlas_tx;
let new_atlas_rx = &mut self.new_atlas_rx;
let allocation = atlas.allocate(alloc_size).unwrap_or_else(|| {
// Atlas allocation failure: try allocating a new texture and atlas.
let (new_atlas, new_col_lights) =
Self::make_atlas(renderer).expect("Failed to create atlas texture");
Self::make_atlas(
scene_data.client,
renderer,
new_atlas_tx,
new_atlas_rx,
Some(&col_lights.texture),
0
)
.expect("Failed to create atlas texture");
// We reset the atlas and clear allocations from existing chunks,
// even though we haven't yet
@ -1252,6 +1338,7 @@ impl<V: RectRasterableVol> Terrain<V> {
can_shadow_sun: false,
blocks_of_interest: response.blocks_of_interest,
z_bounds: mesh.z_bounds,
shadow_z_bounds: mesh.shadow_z_bounds,
frustum_last_plane_index: 0,
});
} else if let Some(chunk) = self.chunks.get_mut(&response.pos) {
@ -1386,11 +1473,11 @@ impl<V: RectRasterableVol> Terrain<V> {
// Ensure the chunk is within the PSR set.
let chunk_box = math::Aabb {
min: math::Vec3::new(chunk_pos.x, chunk_pos.y, chunk.z_bounds.0),
min: math::Vec3::new(chunk_pos.x, chunk_pos.y, chunk.shadow_z_bounds.0),
max: math::Vec3::new(
chunk_pos.x + chunk_sz,
chunk_pos.y + chunk_sz,
chunk.z_bounds.1,
chunk.shadow_z_bounds.1,
),
};