From b26043b0e6b42ca23e6fbaad93f0368dfe0afa44 Mon Sep 17 00:00:00 2001 From: haslersn Date: Fri, 6 Sep 2019 15:23:38 +0200 Subject: [PATCH] common: Rework `Chunk` and `Chonk` implementation Previously, voxels in sparsely populated chunks were stored in a `HashMap`. However, during usage oftentimes block accesses are followed by subsequent nearby voxel accesses. Therefore it's possible to provide cache friendliness, but not with `HashMap`. The previous merge request [!469](https://gitlab.com/veloren/veloren/merge_requests/469) proposed to order voxels by their morton order (see https://en.wikipedia.org/wiki/Z-order_curve ). This provided excellent cache friendliness. However, benchmarks showed that the required indexing calculations are quite expensive. Particular results on my _Intel(R) Core(TM) i7-7500U CPU @ 2.70 GHz_ were: | Benchmark | Before this commit @ d322384becac | Morton Order @ ec8a7caf42ba | This commit | | ---------------------------------------- | --------------------------------- | --------------------------- | -------------------- | | `full read` (81920 voxels) | 17.7ns per voxel | 8.9ns per voxel | **3.6ns** per voxel | | `constrained read` (4913 voxels) | 67.0ns per voxel | 40.1ns per voxel | **14.1ns** per voxel | | `local read` (125 voxels) | 17.5ns per voxel | 14.7ns per voxel | **3.8ns** per voxel | | `X-direction read` (17 voxels) | 17.8ns per voxel | 25.9ns per voxel | **4.2ns** per voxel | | `Y-direction read` (17 voxels) | 18.4ns per voxel | 33.3ns per voxel | **4.5ns** per voxel | | `Z-direction read` (17 voxels) | 18.6ns per voxel | 38.2ns per voxel | **5.4ns** per voxel | | `long Z-direction read` (65 voxels) | 18.0ns per voxel | 37.7ns per voxel | **5.1ns** per voxel | | `full write (dense)` (81920 voxels) | 17.9ns per voxel | **10.3ns** per voxel | 12.4ns per voxel | This commit (instead of utilizing morton order) replaces `HashMap` in the `Chunk` implementation by the following data structure: The volume is spatially subdivided into groups of `4*4*4` blocks. Since a `Chunk` is of total size `32*32*16`, this implies that there are `8*8*4` groups. (These numbers are generic in the actual code such that there are always `256` groups. I.e. the group size is chosen depending on the desired total size of the `Chunk`.) There's a single vector `self.vox` which consecutively stores these groups. Each group might or might not be contained in `self.vox`. A group that is not contained represents that the full group consists only of `self.default` voxels. This saves a lot of memory because oftentimes a `Chunk` consists of either a lot of air or a lot of stone. To track whether a group is contained in `self.vox`, there's an index buffer `self.indices : [u8; 256]`. It contains for each group * (a) the order in which it has been inserted into `self.vox`, if the group is contained in `self.vox` or * (b) 255, otherwise. That case represents that the whole group consists only of `self.default` voxels. (Note that 255 is a valid insertion order for case (a) only if `self.vox` is full and then no other group has the index 255. Therefore there's no ambiguity.) Rationale: The index buffer should be small because: * Small size increases the probability that it will always be in cache. * The index buffer is allocated for every `Chunk` and an almost empty `Chunk` shall not consume too much memory. The number of 256 groups is particularly nice because it means that the index buffer can consist of `u8`s. This keeps the space requirement for the index buffer as low as 4 cache lines. --- client/src/lib.rs | 6 +- common/src/terrain/chonk.rs | 372 +++++++++++++++++------------------ common/src/terrain/mod.rs | 2 +- common/src/volumes/chunk.rs | 378 +++++++++++++++++++++++++++++------- 4 files changed, 500 insertions(+), 258 deletions(-) diff --git a/client/src/lib.rs b/client/src/lib.rs index 4b7550585c..fb1aeb03ed 100644 --- a/client/src/lib.rs +++ b/client/src/lib.rs @@ -12,12 +12,12 @@ use common::{ msg::{ClientMsg, ClientState, RequestStateError, ServerError, ServerInfo, ServerMsg}, net::PostBox, state::{State, Uid}, - terrain::{block::Block, chonk::ChonkMetrics, TerrainChunk, TerrainChunkSize}, + terrain::{block::Block, TerrainChunk, TerrainChunkSize}, vol::RectVolSize, ChatType, }; use hashbrown::HashMap; -use log::{info, log_enabled, warn}; +use log::warn; use std::{ net::SocketAddr, sync::Arc, @@ -398,6 +398,7 @@ impl Client { } } + /* // Output debug metrics if log_enabled!(log::Level::Info) && self.tick % 600 == 0 { let metrics = self @@ -407,6 +408,7 @@ impl Client { .fold(ChonkMetrics::default(), |a, (_, c)| a + c.get_metrics()); info!("{:?}", metrics); } + */ // 7) Finish the tick, pass control back to the frontend. self.tick += 1; diff --git a/common/src/terrain/chonk.rs b/common/src/terrain/chonk.rs index cd7b63b61d..1b1dfa6496 100644 --- a/common/src/terrain/chonk.rs +++ b/common/src/terrain/chonk.rs @@ -1,59 +1,60 @@ -use super::{block::Block, TerrainChunkMeta, TerrainChunkSize}; use crate::{ vol::{ - BaseVol, DefaultPosIterator, DefaultVolIterator, IntoPosIterator, IntoVolIterator, ReadVol, - RectRasterableVol, RectVolSize, VolSize, WriteVol, + BaseVol, IntoPosIterator, IntoVolIterator, ReadVol, RectRasterableVol, RectVolSize, + VolSize, Vox, WriteVol, }, - volumes::chunk::{Chunk, ChunkError}, + volumes::chunk::{Chunk, ChunkError, ChunkPosIter, ChunkVolIter}, }; -use hashbrown::HashMap; use serde_derive::{Deserialize, Serialize}; -use std::ops::Add; +use std::marker::PhantomData; use vek::*; #[derive(Debug)] pub enum ChonkError { - ChunkError(ChunkError), + SubChunkError(ChunkError), OutOfBounds, } -const SUB_CHUNK_HEIGHT: u32 = 16; - #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SubChunkSize; +pub struct SubChunkSize { + phantom: PhantomData, +} -impl VolSize for SubChunkSize { +// TODO (haslersn): Assert ChonkSize::RECT_SIZE.x == ChonkSize::RECT_SIZE.y + +impl VolSize for SubChunkSize { const SIZE: Vec3 = Vec3 { - x: TerrainChunkSize::RECT_SIZE.x, - y: TerrainChunkSize::RECT_SIZE.y, - z: SUB_CHUNK_HEIGHT, + x: ChonkSize::RECT_SIZE.x, + y: ChonkSize::RECT_SIZE.x, + z: ChonkSize::RECT_SIZE.x / 2, }; } -const SUB_CHUNK_HASH_LIMIT: usize = - (SubChunkSize::SIZE.x * SubChunkSize::SIZE.y * SubChunkSize::SIZE.z) as usize / 4; +type SubChunk = Chunk, M>; #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Chonk { +pub struct Chonk { z_offset: i32, - sub_chunks: Vec, - below: Block, - above: Block, - meta: TerrainChunkMeta, + sub_chunks: Vec>, + below: V, + above: V, + meta: M, + phantom: PhantomData, } -impl Chonk { - pub fn new(z_offset: i32, below: Block, above: Block, meta: TerrainChunkMeta) -> Self { +impl Chonk { + pub fn new(z_offset: i32, below: V, above: V, meta: M) -> Self { Self { z_offset, sub_chunks: Vec::new(), below, above, meta, + phantom: PhantomData, } } - pub fn meta(&self) -> &TerrainChunkMeta { + pub fn meta(&self) -> &M { &self.meta } @@ -62,68 +63,40 @@ impl Chonk { } pub fn get_max_z(&self) -> i32 { - self.z_offset + (self.sub_chunks.len() as u32 * SUB_CHUNK_HEIGHT) as i32 - } - - pub fn get_metrics(&self) -> ChonkMetrics { - ChonkMetrics { - chonks: 1, - homogeneous: self - .sub_chunks - .iter() - .filter(|s| match s { - SubChunk::Homogeneous(_) => true, - _ => false, - }) - .count(), - hash: self - .sub_chunks - .iter() - .filter(|s| match s { - SubChunk::Hash(_, _) => true, - _ => false, - }) - .count(), - heterogeneous: self - .sub_chunks - .iter() - .filter(|s| match s { - SubChunk::Heterogeneous(_) => true, - _ => false, - }) - .count(), - } + self.z_offset + (self.sub_chunks.len() as u32 * SubChunkSize::::SIZE.z) as i32 } // Returns the index (in self.sub_chunks) of the SubChunk that contains // layer z; note that this index changes when more SubChunks are prepended - fn sub_chunk_idx(&self, z: i32) -> usize { - ((z - self.z_offset) / SUB_CHUNK_HEIGHT as i32) as usize + fn sub_chunk_idx(&self, z: i32) -> i32 { + let diff = z - self.z_offset; + diff >> (SubChunkSize::::SIZE.z - 1).count_ones() } - // Returns the z_offset of the sub_chunk that contains layer z - fn sub_chunk_z_offset(&self, z: i32) -> i32 { - let rem = (z - self.z_offset) % SUB_CHUNK_HEIGHT as i32; - if rem < 0 { - z - (rem + SUB_CHUNK_HEIGHT as i32) - } else { - z - rem - } + // Converts a z coordinate into a local z coordinate within a sub chunk + fn sub_chunk_z(&self, z: i32) -> i32 { + let diff = z - self.z_offset; + diff & (SubChunkSize::::SIZE.z - 1) as i32 + } + + // Returns the z offset of the sub_chunk that contains layer z + fn sub_chunk_min_z(&self, z: i32) -> i32 { + z - self.sub_chunk_z(z) } } -impl BaseVol for Chonk { - type Vox = Block; +impl BaseVol for Chonk { + type Vox = V; type Error = ChonkError; } -impl RectRasterableVol for Chonk { - const RECT_SIZE: Vec2 = TerrainChunkSize::RECT_SIZE; +impl RectRasterableVol for Chonk { + const RECT_SIZE: Vec2 = S::RECT_SIZE; } -impl ReadVol for Chonk { +impl ReadVol for Chonk { #[inline(always)] - fn get(&self, pos: Vec3) -> Result<&Block, ChonkError> { + fn get(&self, pos: Vec3) -> Result<&V, Self::Error> { if pos.z < self.get_min_z() { // Below the terrain Ok(&self.below) @@ -132,162 +105,181 @@ impl ReadVol for Chonk { Ok(&self.above) } else { // Within the terrain - let sub_chunk_idx = self.sub_chunk_idx(pos.z); - - match &self.sub_chunks[sub_chunk_idx] { - // Can't fail - SubChunk::Homogeneous(block) => Ok(block), - SubChunk::Hash(cblock, map) => { - let rpos = pos - - Vec3::unit_z() - * (self.z_offset + sub_chunk_idx as i32 * SUB_CHUNK_HEIGHT as i32); - - Ok(map.get(&rpos.map(|e| e as u8)).unwrap_or(cblock)) - } - SubChunk::Heterogeneous(chunk) => { - let rpos = pos - - Vec3::unit_z() - * (self.z_offset + sub_chunk_idx as i32 * SUB_CHUNK_HEIGHT as i32); - - chunk.get(rpos).map_err(ChonkError::ChunkError) - } - } + let rpos = pos + - Vec3::unit_z() + * (self.z_offset + sub_chunk_idx * SubChunkSize::::SIZE.z as i32); + self.sub_chunks[sub_chunk_idx as usize] + .get(rpos) + .map_err(Self::Error::SubChunkError) } } } -impl WriteVol for Chonk { +impl WriteVol for Chonk { #[inline(always)] - fn set(&mut self, pos: Vec3, block: Block) -> Result<(), ChonkError> { + fn set(&mut self, pos: Vec3, block: Self::Vox) -> Result<(), Self::Error> { + let mut sub_chunk_idx = self.sub_chunk_idx(pos.z); + if pos.z < self.get_min_z() { // Prepend exactly sufficiently many SubChunks via Vec::splice - let target_z_offset = self.sub_chunk_z_offset(pos.z); - let c = SubChunk::Homogeneous(self.below); - let n = (self.get_min_z() - target_z_offset) / SUB_CHUNK_HEIGHT as i32; - self.sub_chunks - .splice(0..0, std::iter::repeat(c).take(n as usize)); - self.z_offset = target_z_offset; + let c = Chunk::, M>::filled(self.below.clone(), self.meta.clone()); + let n = (-sub_chunk_idx) as usize; + self.sub_chunks.splice(0..0, std::iter::repeat(c).take(n)); + self.z_offset += sub_chunk_idx * SubChunkSize::::SIZE.z as i32; + sub_chunk_idx = 0; } else if pos.z >= self.get_max_z() { // Append exactly sufficiently many SubChunks via Vec::extend - let target_z_offset = self.sub_chunk_z_offset(pos.z); - let c = SubChunk::Homogeneous(self.above); - let n = (target_z_offset - self.get_max_z()) / SUB_CHUNK_HEIGHT as i32 + 1; - self.sub_chunks - .extend(std::iter::repeat(c).take(n as usize)); + let c = Chunk::, M>::filled(self.above.clone(), self.meta.clone()); + let n = 1 + sub_chunk_idx as usize - self.sub_chunks.len(); + self.sub_chunks.extend(std::iter::repeat(c).take(n)); } - let sub_chunk_idx = self.sub_chunk_idx(pos.z); + let rpos = pos + - Vec3::unit_z() * (self.z_offset + sub_chunk_idx * SubChunkSize::::SIZE.z as i32); + self.sub_chunks[sub_chunk_idx as usize] // TODO (haslersn): self.sub_chunks.get(...).and_then(...) + .set(rpos, block) + .map_err(Self::Error::SubChunkError) + } +} - let rpos = - pos - Vec3::unit_z() * (self.z_offset + sub_chunk_idx as i32 * SUB_CHUNK_HEIGHT as i32); +struct ChonkIterHelper { + sub_chunk_min_z: i32, + lower_bound: Vec3, + upper_bound: Vec3, + phantom: PhantomData>, +} - match &mut self.sub_chunks[sub_chunk_idx] { - // Can't fail - SubChunk::Homogeneous(cblock) if block == *cblock => Ok(()), - SubChunk::Homogeneous(cblock) => { - let mut map = HashMap::default(); - map.insert(rpos.map(|e| e as u8), block); +impl Iterator for ChonkIterHelper { + type Item = (i32, Vec3, Vec3); - self.sub_chunks[sub_chunk_idx] = SubChunk::Hash(*cblock, map); - Ok(()) - } - SubChunk::Hash(cblock, map) if block == *cblock => { - map.remove(&rpos.map(|e| e as u8)); - Ok(()) - } - SubChunk::Hash(_cblock, map) if map.len() < SUB_CHUNK_HASH_LIMIT => { - map.insert(rpos.map(|e| e as u8), block); - Ok(()) - } - SubChunk::Hash(cblock, map) => { - let mut new_chunk = Chunk::filled(*cblock, ()); - for (map_pos, map_block) in map { - new_chunk - .set(map_pos.map(|e| i32::from(e)), *map_block) - .unwrap(); // Can't fail (I hope!) + #[inline(always)] + fn next(&mut self) -> Option { + if self.lower_bound.z >= self.upper_bound.z { + return None; + } + let mut lb = self.lower_bound; + let mut ub = self.upper_bound; + let current_min_z = self.sub_chunk_min_z; + lb.z -= current_min_z; + ub.z -= current_min_z; + ub.z = std::cmp::min(ub.z, SubChunkSize::::SIZE.z as i32); + self.sub_chunk_min_z += SubChunkSize::::SIZE.z as i32; + self.lower_bound.z = self.sub_chunk_min_z; + Some((current_min_z, lb, ub)) + } +} + +pub struct ChonkPosIter { + outer: ChonkIterHelper, + opt_inner: Option<(i32, ChunkPosIter, M>)>, +} + +impl Iterator for ChonkPosIter { + type Item = Vec3; + + #[inline(always)] + fn next(&mut self) -> Option { + loop { + if let Some((sub_chunk_min_z, ref mut inner)) = self.opt_inner { + if let Some(mut pos) = inner.next() { + pos.z += sub_chunk_min_z; + return Some(pos); } - - new_chunk.set(rpos, block).unwrap(); // Can't fail (I hope) - - self.sub_chunks[sub_chunk_idx] = SubChunk::Heterogeneous(new_chunk); - Ok(()) } - - /* - SubChunk::Homogeneous(cblock) => { - let mut new_chunk = Chunk::filled(*cblock, ()); - - new_chunk.set(rpos, block).unwrap(); // Can't fail (I hope!) - - self.sub_chunks[sub_chunk_idx] = SubChunk::Heterogeneous(new_chunk); - Ok(()) + match self.outer.next() { + None => return None, + Some((sub_chunk_min_z, lb, ub)) => { + self.opt_inner = Some((sub_chunk_min_z, SubChunk::::pos_iter(lb, ub))) + } } - */ - SubChunk::Heterogeneous(chunk) => { - chunk.set(rpos, block).map_err(ChonkError::ChunkError) - } //_ => unimplemented!(), } } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum SubChunk { - Homogeneous(Block), - Hash(Block, HashMap, Block>), - Heterogeneous(Chunk), +enum InnerChonkVolIter<'a, V: Vox, S: RectVolSize, M: Clone> { + Vol(ChunkVolIter<'a, V, SubChunkSize, M>), + Pos(ChunkPosIter, M>), } -impl SubChunk { - pub fn filled(block: Block) -> Self { - SubChunk::Homogeneous(block) - } +pub struct ChonkVolIter<'a, V: Vox, S: RectVolSize, M: Clone> { + chonk: &'a Chonk, + outer: ChonkIterHelper, + opt_inner: Option<(i32, InnerChonkVolIter<'a, V, S, M>)>, } -#[derive(Debug)] -pub struct ChonkMetrics { - chonks: usize, - homogeneous: usize, - hash: usize, - heterogeneous: usize, -} +impl<'a, V: Vox, S: RectVolSize, M: Clone> Iterator for ChonkVolIter<'a, V, S, M> { + type Item = (Vec3, &'a V); -impl Default for ChonkMetrics { - fn default() -> Self { - ChonkMetrics { - chonks: 0, - homogeneous: 0, - hash: 0, - heterogeneous: 0, + #[inline(always)] + fn next(&mut self) -> Option { + loop { + if let Some((sub_chunk_min_z, ref mut inner)) = self.opt_inner { + let got = match inner { + InnerChonkVolIter::<'a, V, S, M>::Vol(iter) => iter.next(), + InnerChonkVolIter::<'a, V, S, M>::Pos(iter) => iter.next().map(|pos| { + if sub_chunk_min_z < self.chonk.get_min_z() { + (pos, &self.chonk.below) + } else { + (pos, &self.chonk.above) + } + }), + }; + if let Some((mut pos, vox)) = got { + pos.z += sub_chunk_min_z; + return Some((pos, vox)); + } + } + match self.outer.next() { + None => return None, + Some((sub_chunk_min_z, lb, ub)) => { + let inner = if sub_chunk_min_z < self.chonk.get_min_z() + || sub_chunk_min_z >= self.chonk.get_max_z() + { + InnerChonkVolIter::<'a, V, S, M>::Pos(SubChunk::::pos_iter(lb, ub)) + } else { + InnerChonkVolIter::<'a, V, S, M>::Vol( + self.chonk.sub_chunks + [self.chonk.sub_chunk_idx(sub_chunk_min_z) as usize] + .vol_iter(lb, ub), + ) + }; + self.opt_inner = Some((sub_chunk_min_z, inner)); + } + } } } } -impl Add for ChonkMetrics { - type Output = Self; - - fn add(self, other: Self::Output) -> Self { - Self::Output { - chonks: self.chonks + other.chonks, - homogeneous: self.homogeneous + other.homogeneous, - hash: self.hash + other.hash, - heterogeneous: self.heterogeneous + other.heterogeneous, - } - } -} - -impl<'a> IntoPosIterator for &'a Chonk { - type IntoIter = DefaultPosIterator; +impl<'a, V: Vox, S: RectVolSize, M: Clone> IntoPosIterator for &'a Chonk { + type IntoIter = ChonkPosIter; fn pos_iter(self, lower_bound: Vec3, upper_bound: Vec3) -> Self::IntoIter { - DefaultPosIterator::new(lower_bound, upper_bound) + Self::IntoIter { + outer: ChonkIterHelper:: { + sub_chunk_min_z: self.sub_chunk_min_z(lower_bound.z), + lower_bound, + upper_bound, + phantom: PhantomData, + }, + opt_inner: None, + } } } -impl<'a> IntoVolIterator<'a> for &'a Chonk { - type IntoIter = DefaultVolIterator<'a, Chonk>; +impl<'a, V: Vox, S: RectVolSize, M: Clone> IntoVolIterator<'a> for &'a Chonk { + type IntoIter = ChonkVolIter<'a, V, S, M>; fn vol_iter(self, lower_bound: Vec3, upper_bound: Vec3) -> Self::IntoIter { - DefaultVolIterator::new(self, lower_bound, upper_bound) + Self::IntoIter { + chonk: self, + outer: ChonkIterHelper:: { + sub_chunk_min_z: self.sub_chunk_min_z(lower_bound.z), + lower_bound, + upper_bound, + phantom: PhantomData, + }, + opt_inner: None, + } } } diff --git a/common/src/terrain/mod.rs b/common/src/terrain/mod.rs index 43bb15ebf3..d8e6e0e62e 100644 --- a/common/src/terrain/mod.rs +++ b/common/src/terrain/mod.rs @@ -57,5 +57,5 @@ impl TerrainChunkMeta { // Terrain type aliases -pub type TerrainChunk = chonk::Chonk; +pub type TerrainChunk = chonk::Chonk; pub type TerrainGrid = VolGrid2d; diff --git a/common/src/volumes/chunk.rs b/common/src/volumes/chunk.rs index dbe0f069b9..0c8a1c4094 100644 --- a/common/src/volumes/chunk.rs +++ b/common/src/volumes/chunk.rs @@ -1,5 +1,8 @@ -use crate::vol::{BaseVol, ReadVol, SizedVol, VolSize, Vox, WriteVol}; +use crate::vol::{ + BaseVol, IntoPosIterator, IntoVolIterator, RasterableVol, ReadVol, VolSize, Vox, WriteVol, +}; use serde_derive::{Deserialize, Serialize}; +use std::iter::Iterator; use std::marker::PhantomData; use vek::*; @@ -8,81 +11,105 @@ pub enum ChunkError { OutOfBounds, } -/// A volume with dimensions known at compile-time. -// V = Voxel -// S = Size (replace when const generics are a thing) -// M = Metadata +/// The volume is spatially subdivided into groups of `4*4*4` blocks. Since a +/// `Chunk` is of total size `32*32*16`, this implies that there are `8*8*4` +/// groups. (These numbers are generic in the actual code such that there are +/// always `256` groups. I.e. the group size is chosen depending on the desired +/// total size of the `Chunk`.) +/// +/// There's a single vector `self.vox` which consecutively stores these groups. +/// Each group might or might not be contained in `self.vox`. A group that is +/// not contained represents that the full group consists only of `self.default` +/// voxels. This saves a lot of memory because oftentimes a `Chunk` consists of +/// either a lot of air or a lot of stone. +/// +/// To track whether a group is contained in `self.vox`, there's an index buffer +/// `self.indices : [u8; 256]`. It contains for each group +/// +/// * (a) the order in which it has been inserted into `self.vox`, if the group +/// is contained in `self.vox` or +/// * (b) 255, otherwise. That case represents that the whole group consists +/// only of `self.default` voxels. +/// +/// (Note that 255 is a valid insertion order for case (a) only if `self.vox` is +/// full and then no other group has the index 255. Therefore there's no +/// ambiguity.) +/// +/// ## Rationale: +/// +/// The index buffer should be small because: +/// +/// * Small size increases the probability that it will always be in cache. +/// * The index buffer is allocated for every `Chunk` and an almost empty `Chunk` +/// shall not consume too much memory. +/// +/// The number of 256 groups is particularly nice because it means that the index +/// buffer can consist of `u8`s. This keeps the space requirement for the index +/// buffer as low as 4 cache lines. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Chunk { + indices: Vec, // TODO (haslersn): Box<[u8; S::SIZE.x * S::SIZE.y * S::SIZE.z]>, this is however not possible in Rust yet vox: Vec, + default: V, meta: M, phantom: PhantomData, } impl Chunk { - /// Used to transform a voxel position in the volume into its corresponding index - /// in the voxel array. - #[inline(always)] - fn idx_for(pos: Vec3) -> Option { - if pos.map(|e| e >= 0).reduce_and() - && pos.map2(S::SIZE, |e, lim| e < lim as i32).reduce_and() - { - Some(Self::idx_for_unchecked(pos)) - } else { - None - } - } + const VOLUME: u32 = (S::SIZE.x * S::SIZE.y * S::SIZE.z) as u32; + const GROUP_VOLUME: u32 = [Self::VOLUME / 256, 1][(Self::VOLUME < 256) as usize]; + /// `GROUP_COUNT_TOTAL` is always `256`, except if `VOLUME < 256` + const GROUP_COUNT_TOTAL: u32 = Self::VOLUME / Self::GROUP_VOLUME; + const GROUP_LONG_SIDE_LEN: u32 = 1 << ((Self::GROUP_VOLUME * 4 - 1).count_ones() / 3); + const GROUP_SIZE: Vec3 = Vec3::new( + Self::GROUP_LONG_SIDE_LEN, + Self::GROUP_LONG_SIDE_LEN, + Self::GROUP_VOLUME / (Self::GROUP_LONG_SIDE_LEN * Self::GROUP_LONG_SIDE_LEN), + ); + const GROUP_COUNT: Vec3 = Vec3::new( + S::SIZE.x / Self::GROUP_SIZE.x, + S::SIZE.y / Self::GROUP_SIZE.y, + S::SIZE.z / Self::GROUP_SIZE.z, + ); - /// Used to transform a voxel position in the volume into its corresponding index - /// in the voxel array. - #[inline(always)] - fn idx_for_unchecked(pos: Vec3) -> usize { - (pos.x * S::SIZE.y as i32 * S::SIZE.z as i32 + pos.y * S::SIZE.z as i32 + pos.z) as usize - } -} + /// Creates a new `Chunk` with the provided dimensions and all voxels filled + /// with duplicates of the provided voxel. + pub fn filled(default: V, meta: M) -> Self { + // TODO (haslersn): Alter into compile time assertions + // + // An extent is valid if it fulfils the following conditions. + // + // 1. In each direction, the extent is a power of two. + // 2. In each direction, the group size is in [1, 256]. + // 3. In each direction, the group count is in [1, 256]. + // + // Rationales: + // + // 1. We have code in the implementation that assumes it. In particular, + // code using `.count_ones()`. + // 2. The maximum group size is `256x256x256`, because there's code that + // stores group relative indices as `u8`. + // 3. There's code that stores group indices as `u8`. + debug_assert!(S::SIZE.x.is_power_of_two()); + debug_assert!(S::SIZE.y.is_power_of_two()); + debug_assert!(S::SIZE.z.is_power_of_two()); + debug_assert!(0 < Self::GROUP_SIZE.x); + debug_assert!(0 < Self::GROUP_SIZE.y); + debug_assert!(0 < Self::GROUP_SIZE.z); + debug_assert!(Self::GROUP_SIZE.x <= 256); + debug_assert!(Self::GROUP_SIZE.y <= 256); + debug_assert!(Self::GROUP_SIZE.z <= 256); + debug_assert!(0 < Self::GROUP_COUNT.x); + debug_assert!(0 < Self::GROUP_COUNT.y); + debug_assert!(0 < Self::GROUP_COUNT.z); + debug_assert!(Self::GROUP_COUNT.x <= 256); + debug_assert!(Self::GROUP_COUNT.y <= 256); + debug_assert!(Self::GROUP_COUNT.z <= 256); -impl BaseVol for Chunk { - type Vox = V; - type Error = ChunkError; -} - -impl SizedVol for Chunk { - #[inline(always)] - fn lower_bound(&self) -> Vec3 { - Vec3::zero() - } - - #[inline(always)] - fn upper_bound(&self) -> Vec3 { - S::SIZE.map(|e| e as i32) - } -} - -impl ReadVol for Chunk { - #[inline(always)] - fn get(&self, pos: Vec3) -> Result<&V, ChunkError> { - Self::idx_for(pos) - .and_then(|idx| self.vox.get(idx)) - .ok_or(ChunkError::OutOfBounds) - } -} - -impl WriteVol for Chunk { - #[inline(always)] - fn set(&mut self, pos: Vec3, vox: Self::Vox) -> Result<(), ChunkError> { - Self::idx_for(pos) - .and_then(|idx| self.vox.get_mut(idx)) - .map(|old_vox| *old_vox = vox) - .ok_or(ChunkError::OutOfBounds) - } -} - -impl Chunk { - /// Create a new `Chunk` with the provided dimensions and all voxels filled with duplicates of - /// the provided voxel. - pub fn filled(vox: V, meta: M) -> Self { Self { - vox: vec![vox; S::SIZE.product() as usize], + indices: vec![255; Self::GROUP_COUNT_TOTAL as usize], + vox: Vec::new(), + default, meta, phantom: PhantomData, } @@ -97,4 +124,225 @@ impl Chunk { pub fn metadata_mut(&mut self) -> &mut M { &mut self.meta } + + #[inline(always)] + fn grp_idx(pos: Vec3) -> u32 { + let grp_pos = pos.map2(Self::GROUP_SIZE, |e, s| e as u32 / s); + (grp_pos.z * (Self::GROUP_COUNT.y * Self::GROUP_COUNT.x)) + + (grp_pos.y * Self::GROUP_COUNT.x) + + (grp_pos.x) + } + + #[inline(always)] + fn rel_idx(pos: Vec3) -> u32 { + let rel_pos = pos.map2(Self::GROUP_SIZE, |e, s| e as u32 % s); + (rel_pos.z * (Self::GROUP_SIZE.y * Self::GROUP_SIZE.x)) + + (rel_pos.y * Self::GROUP_SIZE.x) + + (rel_pos.x) + } + + #[inline(always)] + fn idx_unchecked(&self, pos: Vec3) -> Option { + let grp_idx = Self::grp_idx(pos); + let rel_idx = Self::rel_idx(pos); + let base = self.indices[grp_idx as usize]; + let num_groups = self.vox.len() as u32 / Self::GROUP_VOLUME; + if base as u32 >= num_groups { + None + } else { + Some((base as u32 * Self::GROUP_VOLUME + rel_idx) as usize) + } + } + + #[inline(always)] + fn force_idx_unchecked(&mut self, pos: Vec3) -> usize { + let grp_idx = Self::grp_idx(pos); + let rel_idx = Self::rel_idx(pos); + let base = &mut self.indices[grp_idx as usize]; + let num_groups = self.vox.len() as u32 / Self::GROUP_VOLUME; + if *base as u32 >= num_groups { + *base = num_groups as u8; + self.vox + .extend(std::iter::repeat(self.default.clone()).take(Self::GROUP_VOLUME as usize)); + } + (*base as u32 * Self::GROUP_VOLUME + rel_idx) as usize + } + + #[inline(always)] + fn get_unchecked(&self, pos: Vec3) -> &V { + match self.idx_unchecked(pos) { + Some(idx) => &self.vox[idx], + None => &self.default, + } + } + + #[inline(always)] + fn set_unchecked(&mut self, pos: Vec3, vox: V) { + if vox != self.default { + let idx = self.force_idx_unchecked(pos); + self.vox[idx] = vox; + } else if let Some(idx) = self.idx_unchecked(pos) { + self.vox[idx] = vox; + } + } +} + +impl BaseVol for Chunk { + type Vox = V; + type Error = ChunkError; +} + +impl RasterableVol for Chunk { + const SIZE: Vec3 = S::SIZE; +} + +impl ReadVol for Chunk { + #[inline(always)] + fn get(&self, pos: Vec3) -> Result<&Self::Vox, Self::Error> { + if !pos + .map2(S::SIZE, |e, s| 0 <= e && e < s as i32) + .reduce_and() + { + Err(Self::Error::OutOfBounds) + } else { + Ok(self.get_unchecked(pos)) + } + } +} + +impl WriteVol for Chunk { + #[inline(always)] + fn set(&mut self, pos: Vec3, vox: Self::Vox) -> Result<(), Self::Error> { + if !pos + .map2(S::SIZE, |e, s| 0 <= e && e < s as i32) + .reduce_and() + { + Err(Self::Error::OutOfBounds) + } else { + Ok(self.set_unchecked(pos, vox)) + } + } +} + +pub struct ChunkPosIter { + // Store as `u8`s so as to reduce memory footprint. + lb: Vec3, + ub: Vec3, + pos: Vec3, + phantom: PhantomData>, +} + +impl ChunkPosIter { + fn new(lower_bound: Vec3, upper_bound: Vec3) -> Self { + // If the range is empty, then we have the special case `ub = lower_bound`. + let ub = if lower_bound.map2(upper_bound, |l, u| l < u).reduce_and() { + upper_bound + } else { + lower_bound + }; + Self { + lb: lower_bound, + ub, + pos: lower_bound, + phantom: PhantomData, + } + } +} + +impl Iterator for ChunkPosIter { + type Item = Vec3; + + #[inline(always)] + fn next(&mut self) -> Option { + if self.pos.z >= self.ub.z { + return None; + } + let res = Some(self.pos); + + self.pos.x += 1; + if self.pos.x != self.ub.x && self.pos.x % Chunk::::GROUP_SIZE.x as i32 != 0 { + return res; + } + self.pos.x = std::cmp::max( + self.lb.x, + (self.pos.x - 1) & !(Chunk::::GROUP_SIZE.x as i32 - 1), + ); + + self.pos.y += 1; + if self.pos.y != self.ub.y && self.pos.y % Chunk::::GROUP_SIZE.y as i32 != 0 { + return res; + } + self.pos.y = std::cmp::max( + self.lb.y, + (self.pos.y - 1) & !(Chunk::::GROUP_SIZE.y as i32 - 1), + ); + + self.pos.z += 1; + if self.pos.z != self.ub.z && self.pos.z % Chunk::::GROUP_SIZE.z as i32 != 0 { + return res; + } + self.pos.z = std::cmp::max( + self.lb.z, + (self.pos.z - 1) & !(Chunk::::GROUP_SIZE.z as i32 - 1), + ); + + self.pos.x = (self.pos.x | (Chunk::::GROUP_SIZE.x as i32 - 1)) + 1; + if self.pos.x < self.ub.x { + return res; + } + self.pos.x = self.lb.x; + + self.pos.y = (self.pos.y | (Chunk::::GROUP_SIZE.y as i32 - 1)) + 1; + if self.pos.y < self.ub.y { + return res; + } + self.pos.y = self.lb.y; + + self.pos.z = (self.pos.z | (Chunk::::GROUP_SIZE.z as i32 - 1)) + 1; + + res + } +} + +pub struct ChunkVolIter<'a, V: Vox, S: VolSize, M> { + chunk: &'a Chunk, + iter_impl: ChunkPosIter, +} + +impl<'a, V: Vox, S: VolSize, M> Iterator for ChunkVolIter<'a, V, S, M> { + type Item = (Vec3, &'a V); + + #[inline(always)] + fn next(&mut self) -> Option { + self.iter_impl + .next() + .map(|pos| (pos, self.chunk.get_unchecked(pos))) + } +} + +impl Chunk { + /// It's possible to obtain a positional iterator without having a `Chunk` + /// instance. + pub fn pos_iter(lower_bound: Vec3, upper_bound: Vec3) -> ChunkPosIter { + ChunkPosIter::::new(lower_bound, upper_bound) + } +} + +impl<'a, V: Vox, S: VolSize, M> IntoPosIterator for &'a Chunk { + type IntoIter = ChunkPosIter; + + fn pos_iter(self, lower_bound: Vec3, upper_bound: Vec3) -> Self::IntoIter { + Chunk::::pos_iter(lower_bound, upper_bound) + } +} + +impl<'a, V: Vox, S: VolSize, M> IntoVolIterator<'a> for &'a Chunk { + type IntoIter = ChunkVolIter<'a, V, S, M>; + + fn vol_iter(self, lower_bound: Vec3, upper_bound: Vec3) -> Self::IntoIter { + ChunkVolIter::<'a, V, S, M> { + chunk: self, + iter_impl: ChunkPosIter::::new(lower_bound, upper_bound), + } + } }