mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
b26043b0e6
Previously, voxels in sparsely populated chunks were stored in a `HashMap`. However, during usage oftentimes block accesses are followed by subsequent nearby voxel accesses. Therefore it's possible to provide cache friendliness, but not with `HashMap`. The previous merge request [!469](https://gitlab.com/veloren/veloren/merge_requests/469) proposed to order voxels by their morton order (see https://en.wikipedia.org/wiki/Z-order_curve ). This provided excellent cache friendliness. However, benchmarks showed that the required indexing calculations are quite expensive. Particular results on my _Intel(R) Core(TM) i7-7500U CPU @ 2.70 GHz_ were: | Benchmark | Before this commit @d322384bec
| Morton Order @ec8a7caf42
| This commit | | ---------------------------------------- | --------------------------------- | --------------------------- | -------------------- | | `full read` (81920 voxels) | 17.7ns per voxel | 8.9ns per voxel | **3.6ns** per voxel | | `constrained read` (4913 voxels) | 67.0ns per voxel | 40.1ns per voxel | **14.1ns** per voxel | | `local read` (125 voxels) | 17.5ns per voxel | 14.7ns per voxel | **3.8ns** per voxel | | `X-direction read` (17 voxels) | 17.8ns per voxel | 25.9ns per voxel | **4.2ns** per voxel | | `Y-direction read` (17 voxels) | 18.4ns per voxel | 33.3ns per voxel | **4.5ns** per voxel | | `Z-direction read` (17 voxels) | 18.6ns per voxel | 38.2ns per voxel | **5.4ns** per voxel | | `long Z-direction read` (65 voxels) | 18.0ns per voxel | 37.7ns per voxel | **5.1ns** per voxel | | `full write (dense)` (81920 voxels) | 17.9ns per voxel | **10.3ns** per voxel | 12.4ns per voxel | This commit (instead of utilizing morton order) replaces `HashMap` in the `Chunk` implementation by the following data structure: The volume is spatially subdivided into groups of `4*4*4` blocks. Since a `Chunk` is of total size `32*32*16`, this implies that there are `8*8*4` groups. (These numbers are generic in the actual code such that there are always `256` groups. I.e. the group size is chosen depending on the desired total size of the `Chunk`.) There's a single vector `self.vox` which consecutively stores these groups. Each group might or might not be contained in `self.vox`. A group that is not contained represents that the full group consists only of `self.default` voxels. This saves a lot of memory because oftentimes a `Chunk` consists of either a lot of air or a lot of stone. To track whether a group is contained in `self.vox`, there's an index buffer `self.indices : [u8; 256]`. It contains for each group * (a) the order in which it has been inserted into `self.vox`, if the group is contained in `self.vox` or * (b) 255, otherwise. That case represents that the whole group consists only of `self.default` voxels. (Note that 255 is a valid insertion order for case (a) only if `self.vox` is full and then no other group has the index 255. Therefore there's no ambiguity.) Rationale: The index buffer should be small because: * Small size increases the probability that it will always be in cache. * The index buffer is allocated for every `Chunk` and an almost empty `Chunk` shall not consume too much memory. The number of 256 groups is particularly nice because it means that the index buffer can consist of `u8`s. This keeps the space requirement for the index buffer as low as 4 cache lines.
286 lines
9.5 KiB
Rust
286 lines
9.5 KiB
Rust
use crate::{
|
|
vol::{
|
|
BaseVol, IntoPosIterator, IntoVolIterator, ReadVol, RectRasterableVol, RectVolSize,
|
|
VolSize, Vox, WriteVol,
|
|
},
|
|
volumes::chunk::{Chunk, ChunkError, ChunkPosIter, ChunkVolIter},
|
|
};
|
|
use serde_derive::{Deserialize, Serialize};
|
|
use std::marker::PhantomData;
|
|
use vek::*;
|
|
|
|
#[derive(Debug)]
|
|
pub enum ChonkError {
|
|
SubChunkError(ChunkError),
|
|
OutOfBounds,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct SubChunkSize<ChonkSize: RectVolSize> {
|
|
phantom: PhantomData<ChonkSize>,
|
|
}
|
|
|
|
// TODO (haslersn): Assert ChonkSize::RECT_SIZE.x == ChonkSize::RECT_SIZE.y
|
|
|
|
impl<ChonkSize: RectVolSize> VolSize for SubChunkSize<ChonkSize> {
|
|
const SIZE: Vec3<u32> = Vec3 {
|
|
x: ChonkSize::RECT_SIZE.x,
|
|
y: ChonkSize::RECT_SIZE.x,
|
|
z: ChonkSize::RECT_SIZE.x / 2,
|
|
};
|
|
}
|
|
|
|
type SubChunk<V, S, M> = Chunk<V, SubChunkSize<S>, M>;
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct Chonk<V: Vox, S: RectVolSize, M: Clone> {
|
|
z_offset: i32,
|
|
sub_chunks: Vec<SubChunk<V, S, M>>,
|
|
below: V,
|
|
above: V,
|
|
meta: M,
|
|
phantom: PhantomData<S>,
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> Chonk<V, S, M> {
|
|
pub fn new(z_offset: i32, below: V, above: V, meta: M) -> Self {
|
|
Self {
|
|
z_offset,
|
|
sub_chunks: Vec::new(),
|
|
below,
|
|
above,
|
|
meta,
|
|
phantom: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn meta(&self) -> &M {
|
|
&self.meta
|
|
}
|
|
|
|
pub fn get_min_z(&self) -> i32 {
|
|
self.z_offset
|
|
}
|
|
|
|
pub fn get_max_z(&self) -> i32 {
|
|
self.z_offset + (self.sub_chunks.len() as u32 * SubChunkSize::<S>::SIZE.z) as i32
|
|
}
|
|
|
|
// Returns the index (in self.sub_chunks) of the SubChunk that contains
|
|
// layer z; note that this index changes when more SubChunks are prepended
|
|
fn sub_chunk_idx(&self, z: i32) -> i32 {
|
|
let diff = z - self.z_offset;
|
|
diff >> (SubChunkSize::<S>::SIZE.z - 1).count_ones()
|
|
}
|
|
|
|
// Converts a z coordinate into a local z coordinate within a sub chunk
|
|
fn sub_chunk_z(&self, z: i32) -> i32 {
|
|
let diff = z - self.z_offset;
|
|
diff & (SubChunkSize::<S>::SIZE.z - 1) as i32
|
|
}
|
|
|
|
// Returns the z offset of the sub_chunk that contains layer z
|
|
fn sub_chunk_min_z(&self, z: i32) -> i32 {
|
|
z - self.sub_chunk_z(z)
|
|
}
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> BaseVol for Chonk<V, S, M> {
|
|
type Vox = V;
|
|
type Error = ChonkError;
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> RectRasterableVol for Chonk<V, S, M> {
|
|
const RECT_SIZE: Vec2<u32> = S::RECT_SIZE;
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> ReadVol for Chonk<V, S, M> {
|
|
#[inline(always)]
|
|
fn get(&self, pos: Vec3<i32>) -> Result<&V, Self::Error> {
|
|
if pos.z < self.get_min_z() {
|
|
// Below the terrain
|
|
Ok(&self.below)
|
|
} else if pos.z >= self.get_max_z() {
|
|
// Above the terrain
|
|
Ok(&self.above)
|
|
} else {
|
|
// Within the terrain
|
|
let sub_chunk_idx = self.sub_chunk_idx(pos.z);
|
|
let rpos = pos
|
|
- Vec3::unit_z()
|
|
* (self.z_offset + sub_chunk_idx * SubChunkSize::<S>::SIZE.z as i32);
|
|
self.sub_chunks[sub_chunk_idx as usize]
|
|
.get(rpos)
|
|
.map_err(Self::Error::SubChunkError)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> WriteVol for Chonk<V, S, M> {
|
|
#[inline(always)]
|
|
fn set(&mut self, pos: Vec3<i32>, block: Self::Vox) -> Result<(), Self::Error> {
|
|
let mut sub_chunk_idx = self.sub_chunk_idx(pos.z);
|
|
|
|
if pos.z < self.get_min_z() {
|
|
// Prepend exactly sufficiently many SubChunks via Vec::splice
|
|
let c = Chunk::<V, SubChunkSize<S>, M>::filled(self.below.clone(), self.meta.clone());
|
|
let n = (-sub_chunk_idx) as usize;
|
|
self.sub_chunks.splice(0..0, std::iter::repeat(c).take(n));
|
|
self.z_offset += sub_chunk_idx * SubChunkSize::<S>::SIZE.z as i32;
|
|
sub_chunk_idx = 0;
|
|
} else if pos.z >= self.get_max_z() {
|
|
// Append exactly sufficiently many SubChunks via Vec::extend
|
|
let c = Chunk::<V, SubChunkSize<S>, M>::filled(self.above.clone(), self.meta.clone());
|
|
let n = 1 + sub_chunk_idx as usize - self.sub_chunks.len();
|
|
self.sub_chunks.extend(std::iter::repeat(c).take(n));
|
|
}
|
|
|
|
let rpos = pos
|
|
- Vec3::unit_z() * (self.z_offset + sub_chunk_idx * SubChunkSize::<S>::SIZE.z as i32);
|
|
self.sub_chunks[sub_chunk_idx as usize] // TODO (haslersn): self.sub_chunks.get(...).and_then(...)
|
|
.set(rpos, block)
|
|
.map_err(Self::Error::SubChunkError)
|
|
}
|
|
}
|
|
|
|
struct ChonkIterHelper<V: Vox, S: RectVolSize, M: Clone> {
|
|
sub_chunk_min_z: i32,
|
|
lower_bound: Vec3<i32>,
|
|
upper_bound: Vec3<i32>,
|
|
phantom: PhantomData<Chonk<V, S, M>>,
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> Iterator for ChonkIterHelper<V, S, M> {
|
|
type Item = (i32, Vec3<i32>, Vec3<i32>);
|
|
|
|
#[inline(always)]
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.lower_bound.z >= self.upper_bound.z {
|
|
return None;
|
|
}
|
|
let mut lb = self.lower_bound;
|
|
let mut ub = self.upper_bound;
|
|
let current_min_z = self.sub_chunk_min_z;
|
|
lb.z -= current_min_z;
|
|
ub.z -= current_min_z;
|
|
ub.z = std::cmp::min(ub.z, SubChunkSize::<S>::SIZE.z as i32);
|
|
self.sub_chunk_min_z += SubChunkSize::<S>::SIZE.z as i32;
|
|
self.lower_bound.z = self.sub_chunk_min_z;
|
|
Some((current_min_z, lb, ub))
|
|
}
|
|
}
|
|
|
|
pub struct ChonkPosIter<V: Vox, S: RectVolSize, M: Clone> {
|
|
outer: ChonkIterHelper<V, S, M>,
|
|
opt_inner: Option<(i32, ChunkPosIter<V, SubChunkSize<S>, M>)>,
|
|
}
|
|
|
|
impl<V: Vox, S: RectVolSize, M: Clone> Iterator for ChonkPosIter<V, S, M> {
|
|
type Item = Vec3<i32>;
|
|
|
|
#[inline(always)]
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
loop {
|
|
if let Some((sub_chunk_min_z, ref mut inner)) = self.opt_inner {
|
|
if let Some(mut pos) = inner.next() {
|
|
pos.z += sub_chunk_min_z;
|
|
return Some(pos);
|
|
}
|
|
}
|
|
match self.outer.next() {
|
|
None => return None,
|
|
Some((sub_chunk_min_z, lb, ub)) => {
|
|
self.opt_inner = Some((sub_chunk_min_z, SubChunk::<V, S, M>::pos_iter(lb, ub)))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
enum InnerChonkVolIter<'a, V: Vox, S: RectVolSize, M: Clone> {
|
|
Vol(ChunkVolIter<'a, V, SubChunkSize<S>, M>),
|
|
Pos(ChunkPosIter<V, SubChunkSize<S>, M>),
|
|
}
|
|
|
|
pub struct ChonkVolIter<'a, V: Vox, S: RectVolSize, M: Clone> {
|
|
chonk: &'a Chonk<V, S, M>,
|
|
outer: ChonkIterHelper<V, S, M>,
|
|
opt_inner: Option<(i32, InnerChonkVolIter<'a, V, S, M>)>,
|
|
}
|
|
|
|
impl<'a, V: Vox, S: RectVolSize, M: Clone> Iterator for ChonkVolIter<'a, V, S, M> {
|
|
type Item = (Vec3<i32>, &'a V);
|
|
|
|
#[inline(always)]
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
loop {
|
|
if let Some((sub_chunk_min_z, ref mut inner)) = self.opt_inner {
|
|
let got = match inner {
|
|
InnerChonkVolIter::<'a, V, S, M>::Vol(iter) => iter.next(),
|
|
InnerChonkVolIter::<'a, V, S, M>::Pos(iter) => iter.next().map(|pos| {
|
|
if sub_chunk_min_z < self.chonk.get_min_z() {
|
|
(pos, &self.chonk.below)
|
|
} else {
|
|
(pos, &self.chonk.above)
|
|
}
|
|
}),
|
|
};
|
|
if let Some((mut pos, vox)) = got {
|
|
pos.z += sub_chunk_min_z;
|
|
return Some((pos, vox));
|
|
}
|
|
}
|
|
match self.outer.next() {
|
|
None => return None,
|
|
Some((sub_chunk_min_z, lb, ub)) => {
|
|
let inner = if sub_chunk_min_z < self.chonk.get_min_z()
|
|
|| sub_chunk_min_z >= self.chonk.get_max_z()
|
|
{
|
|
InnerChonkVolIter::<'a, V, S, M>::Pos(SubChunk::<V, S, M>::pos_iter(lb, ub))
|
|
} else {
|
|
InnerChonkVolIter::<'a, V, S, M>::Vol(
|
|
self.chonk.sub_chunks
|
|
[self.chonk.sub_chunk_idx(sub_chunk_min_z) as usize]
|
|
.vol_iter(lb, ub),
|
|
)
|
|
};
|
|
self.opt_inner = Some((sub_chunk_min_z, inner));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, V: Vox, S: RectVolSize, M: Clone> IntoPosIterator for &'a Chonk<V, S, M> {
|
|
type IntoIter = ChonkPosIter<V, S, M>;
|
|
|
|
fn pos_iter(self, lower_bound: Vec3<i32>, upper_bound: Vec3<i32>) -> Self::IntoIter {
|
|
Self::IntoIter {
|
|
outer: ChonkIterHelper::<V, S, M> {
|
|
sub_chunk_min_z: self.sub_chunk_min_z(lower_bound.z),
|
|
lower_bound,
|
|
upper_bound,
|
|
phantom: PhantomData,
|
|
},
|
|
opt_inner: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, V: Vox, S: RectVolSize, M: Clone> IntoVolIterator<'a> for &'a Chonk<V, S, M> {
|
|
type IntoIter = ChonkVolIter<'a, V, S, M>;
|
|
|
|
fn vol_iter(self, lower_bound: Vec3<i32>, upper_bound: Vec3<i32>) -> Self::IntoIter {
|
|
Self::IntoIter {
|
|
chonk: self,
|
|
outer: ChonkIterHelper::<V, S, M> {
|
|
sub_chunk_min_z: self.sub_chunk_min_z(lower_bound.z),
|
|
lower_bound,
|
|
upper_bound,
|
|
phantom: PhantomData,
|
|
},
|
|
opt_inner: None,
|
|
}
|
|
}
|
|
}
|