From 9ee0cd82d05eb05b244bbf23cb33d30d3f7aa1fc Mon Sep 17 00:00:00 2001 From: Joshua Yanovski <pythonesque@gmail.com> Date: Mon, 13 Jan 2020 05:12:56 +0100 Subject: [PATCH] Code restructuring for performance. Turned a lot of for loops into for_each loops, which should be easier for LLVM to optimize currently. Also updated almost all the non-erosion stuff in WorldGen to run in parallel (and take advantage of the cache, in the case of TownGen), and hopefully improved performance somewhat for chunk generation as well. --- world/Cargo.toml | 2 +- world/src/block/mod.rs | 94 ++++++++-------- world/src/block/natural.rs | 5 +- world/src/column/mod.rs | 9 +- world/src/generator/town/mod.rs | 16 ++- world/src/lib.rs | 14 +-- world/src/sim/erosion.rs | 194 ++++++++++++++++---------------- world/src/sim/mod.rs | 116 +++++++++++-------- world/src/util/random.rs | 1 + world/src/util/structure.rs | 130 ++++++++++++++++++--- 10 files changed, 351 insertions(+), 230 deletions(-) diff --git a/world/Cargo.toml b/world/Cargo.toml index 978be53dfb..7152078ac6 100644 --- a/world/Cargo.toml +++ b/world/Cargo.toml @@ -13,7 +13,7 @@ vek = "0.9.9" noise = { version = "0.6.0", default-features = false } num = "0.2.0" ordered-float = "1.0" -hashbrown = { version = "0.6.2", features = ["serde"] } +hashbrown = { version = "0.6.2", features = ["rayon", "serde"] } lazy_static = "1.4.0" log = "0.4.8" rand = "0.7.2" diff --git a/world/src/block/mod.rs b/world/src/block/mod.rs index 83d6553ff7..9ba202dfed 100644 --- a/world/src/block/mod.rs +++ b/world/src/block/mod.rs @@ -3,8 +3,9 @@ mod natural; use crate::{ column::{ColumnGen, ColumnSample}, generator::{Generator, TownGen}, + sim::WorldSim, util::{RandomField, Sampler, SmallCache}, - World, CONFIG, + CONFIG, }; use common::{ terrain::{structure::StructureBlock, Block, BlockKind, Structure}, @@ -15,28 +16,26 @@ use std::ops::{Add, Div, Mul, Neg}; use vek::*; pub struct BlockGen<'a> { - world: &'a World, - column_cache: SmallCache<Option<ColumnSample<'a>>>, - column_gen: ColumnGen<'a>, + pub column_cache: SmallCache<Option<ColumnSample<'a>>>, + pub column_gen: ColumnGen<'a>, } impl<'a> BlockGen<'a> { - pub fn new(world: &'a World, column_gen: ColumnGen<'a>) -> Self { + pub fn new(column_gen: ColumnGen<'a>) -> Self { Self { - world, column_cache: SmallCache::default(), column_gen, } } - fn sample_column( + pub fn sample_column<'b>( column_gen: &ColumnGen<'a>, - cache: &mut SmallCache<Option<ColumnSample<'a>>>, + cache: &'b mut SmallCache<Option<ColumnSample<'a>>>, wpos: Vec2<i32>, - ) -> Option<ColumnSample<'a>> { + ) -> Option<&'b ColumnSample<'a>> { cache .get(Vec2::from(wpos), |wpos| column_gen.get(wpos)) - .clone() + .as_ref() } fn get_cliff_height( @@ -91,7 +90,6 @@ impl<'a> BlockGen<'a> { pub fn get_z_cache(&mut self, wpos: Vec2<i32>) -> Option<ZCache<'a>> { let BlockGen { - world: _, column_cache, column_gen, } = self; @@ -100,40 +98,37 @@ impl<'a> BlockGen<'a> { let sample = column_gen.get(wpos)?; // Tree samples - let mut structure_samples = [None, None, None, None, None, None, None, None, None]; - for i in 0..structure_samples.len() { - if let Some(st) = sample.close_structures[i] { - let st_sample = Self::sample_column(column_gen, column_cache, Vec2::from(st.pos)); - structure_samples[i] = st_sample; - } - } - let mut structures = [None, None, None, None, None, None, None, None, None]; - for i in 0..structures.len() { - if let (Some(st), Some(st_sample)) = - (sample.close_structures[i], structure_samples[i].clone()) - { - let st_info = match st.meta { - None => natural::structure_gen( - column_gen, - column_cache, - i, - st.pos, - st.seed, - &structure_samples, - ), - Some(meta) => Some(StructureInfo { - pos: Vec3::from(st.pos) + Vec3::unit_z() * st_sample.alt as i32, - seed: st.seed, - meta, - }), - }; - - if let Some(st_info) = st_info { - structures[i] = Some((st_info, st_sample)); + sample + .close_structures + .iter() + .zip(structures.iter_mut()) + .for_each(|(close_structure, structure)| { + if let Some(st) = *close_structure { + let st_sample = + Self::sample_column(column_gen, column_cache, Vec2::from(st.pos)); + if let Some(st_sample) = st_sample { + let st_sample = st_sample.clone(); + let st_info = match st.meta { + None => natural::structure_gen( + column_gen, + column_cache, + st.pos, + st.seed, + &st_sample, + ), + Some(meta) => Some(StructureInfo { + pos: Vec3::from(st.pos) + Vec3::unit_z() * st_sample.alt as i32, + seed: st.seed, + meta, + }), + }; + if let Some(st_info) = st_info { + *structure = Some((st_info, st_sample)); + } + } } - } - } + }); Some(ZCache { wpos, @@ -149,10 +144,10 @@ impl<'a> BlockGen<'a> { only_structures: bool, ) -> Option<Block> { let BlockGen { - world, column_cache, column_gen, } = self; + let world = column_gen.sim; let sample = &z_cache?.sample; let &ColumnSample { @@ -193,7 +188,6 @@ impl<'a> BlockGen<'a> { } else { // Apply warping let warp = world - .sim() .gen_ctx .warp_nz .get(wposf.div(24.0)) @@ -208,8 +202,8 @@ impl<'a> BlockGen<'a> { (surface_height, false) } else { let turb = Vec2::new( - world.sim().gen_ctx.fast_turb_x_nz.get(wposf.div(25.0)) as f32, - world.sim().gen_ctx.fast_turb_y_nz.get(wposf.div(25.0)) as f32, + world.gen_ctx.fast_turb_x_nz.get(wposf.div(25.0)) as f32, + world.gen_ctx.fast_turb_y_nz.get(wposf.div(25.0)) as f32, ) * 8.0; // let turb = Lerp::lerp(0.0, turb, warp_factor); @@ -352,9 +346,9 @@ impl<'a> BlockGen<'a> { .or_else(|| { // Rocks if (height + 2.5 - wposf.z as f32).div(7.5).abs().powf(2.0) < rock { - let field0 = RandomField::new(world.sim().seed + 0); - let field1 = RandomField::new(world.sim().seed + 1); - let field2 = RandomField::new(world.sim().seed + 2); + let field0 = RandomField::new(world.seed + 0); + let field1 = RandomField::new(world.seed + 1); + let field2 = RandomField::new(world.seed + 2); Some(Block::new( BlockKind::Normal, diff --git a/world/src/block/natural.rs b/world/src/block/natural.rs index be162d8b54..46d1486d23 100644 --- a/world/src/block/natural.rs +++ b/world/src/block/natural.rs @@ -23,13 +23,10 @@ static QUIRKY_RAND: RandomPerm = RandomPerm::new(0xA634460F); pub fn structure_gen<'a>( column_gen: &ColumnGen<'a>, column_cache: &mut SmallCache<Option<ColumnSample<'a>>>, - idx: usize, st_pos: Vec2<i32>, st_seed: u32, - structure_samples: &[Option<ColumnSample>; 9], + st_sample: &ColumnSample, ) -> Option<StructureInfo> { - let st_sample = &structure_samples[idx].as_ref()?; - // Assuming it's a tree... figure out when it SHOULDN'T spawn let random_seed = (st_seed as f64) / (u32::MAX as f64); if (st_sample.tree_density as f64) < random_seed diff --git a/world/src/column/mod.rs b/world/src/column/mod.rs index 0a2f859572..33b323037f 100644 --- a/world/src/column/mod.rs +++ b/world/src/column/mod.rs @@ -476,13 +476,13 @@ impl<'a> Sampler<'a> for ColumnGen<'a> { // this point towards the altitude of the river. Specifically, as the dist goes from // TerrainChunkSize::RECT_SIZE.x to 0, the weighted altitude of this point should go from // alt to river_alt. - for (river_chunk_idx, river_chunk, river, dist) in neighbor_river_data { + neighbor_river_data.for_each(|(river_chunk_idx, river_chunk, river, dist)| { match river.river_kind { Some(kind) => { if kind.is_river() && !dist.is_some() { // Ostensibly near a river segment, but not "usefully" so (there is no // closest point between t = 0.0 and t = 1.0). - continue; + return; } else { let river_dist = dist.map(|(_, dist, _, (river_t, _, downhill_river))| { let downhill_height = if kind.is_river() { @@ -537,7 +537,7 @@ impl<'a> Sampler<'a> for ColumnGen<'a> { let river_dist = dist.y; if !(dist.x == 0.0 && river_dist < scale_factor) { - continue; + return; } // We basically want to project outwards from river_pos, along the current // tangent line, to chunks <= river_width * 1.0 away from this @@ -564,7 +564,8 @@ impl<'a> Sampler<'a> for ColumnGen<'a> { } None => {} } - } + }); + let river_scale_factor = if river_count == 0.0 { 1.0 } else { diff --git a/world/src/generator/town/mod.rs b/world/src/generator/town/mod.rs index 476d46595c..8a44eff32b 100644 --- a/world/src/generator/town/mod.rs +++ b/world/src/generator/town/mod.rs @@ -3,7 +3,7 @@ mod vol; use super::{Generator, SpawnRules}; use crate::{ - block::block_from_structure, + block::{block_from_structure, BlockGen}, column::{ColumnGen, ColumnSample}, util::Sampler, CONFIG, @@ -123,23 +123,27 @@ pub struct TownState { } impl TownState { - pub fn generate(center: Vec2<i32>, gen: &mut ColumnGen, rng: &mut impl Rng) -> Option<Self> { + pub fn generate(center: Vec2<i32>, gen: &mut BlockGen, rng: &mut impl Rng) -> Option<Self> { let radius = rng.gen_range(18, 20) * 9; let size = Vec2::broadcast(radius * 2 / 9 - 2); - if gen.get(center).map(|sample| sample.chaos).unwrap_or(0.0) > 0.35 - || gen.get(center).map(|sample| sample.alt).unwrap_or(0.0) < CONFIG.sea_level + 10.0 + let center_col = BlockGen::sample_column(&gen.column_gen, &mut gen.column_cache, center); + + if center_col.map(|sample| sample.chaos).unwrap_or(0.0) > 0.35 + || center_col.map(|sample| sample.alt).unwrap_or(0.0) < CONFIG.sea_level + 10.0 { return None; } - let alt = gen.get(center).map(|sample| sample.alt).unwrap_or(0.0) as i32; + let alt = center_col.map(|sample| sample.alt).unwrap_or(0.0) as i32; let mut vol = TownVol::generate_from( size, |pos| { let wpos = center + (pos - size / 2) * CELL_SIZE + CELL_SIZE / 2; - let rel_alt = gen.get(wpos).map(|sample| sample.alt).unwrap_or(0.0) as i32 + let rel_alt = BlockGen::sample_column(&gen.column_gen, &mut gen.column_cache, wpos) + .map(|sample| sample.alt) + .unwrap_or(0.0) as i32 + CELL_HEIGHT / 2 - alt; diff --git a/world/src/lib.rs b/world/src/lib.rs index 967111076b..3a7579fd47 100644 --- a/world/src/lib.rs +++ b/world/src/lib.rs @@ -57,7 +57,7 @@ impl World { } pub fn sample_blocks(&self) -> BlockGen { - BlockGen::new(self, ColumnGen::new(&self.sim)) + BlockGen::new(ColumnGen::new(&self.sim)) } pub fn generate_chunk( @@ -101,8 +101,8 @@ impl World { let chunk_block_pos = Vec3::from(chunk_pos) * TerrainChunkSize::RECT_SIZE.map(|e| e as i32); let mut chunk = TerrainChunk::new(base_z, stone, air, meta); - for x in 0..TerrainChunkSize::RECT_SIZE.x as i32 { - for y in 0..TerrainChunkSize::RECT_SIZE.y as i32 { + for y in 0..TerrainChunkSize::RECT_SIZE.y as i32 { + for x in 0..TerrainChunkSize::RECT_SIZE.x as i32 { if should_continue() { return Err(()); }; @@ -116,11 +116,11 @@ impl World { let (min_z, only_structures_min_z, max_z) = z_cache.get_z_limits(&mut sampler); - for z in base_z..min_z as i32 { + (base_z..min_z as i32).for_each(|z| { let _ = chunk.set(Vec3::new(x, y, z), stone); - } + }); - for z in min_z as i32..max_z as i32 { + (min_z as i32..max_z as i32).for_each(|z| { let lpos = Vec3::new(x, y, z); let wpos = chunk_block_pos + lpos; let only_structures = lpos.z >= only_structures_min_z as i32; @@ -130,7 +130,7 @@ impl World { { let _ = chunk.set(lpos, block); } - } + }); } } diff --git a/world/src/sim/erosion.rs b/world/src/sim/erosion.rs index 2563e06a0e..23eab8db95 100644 --- a/world/src/sim/erosion.rs +++ b/world/src/sim/erosion.rs @@ -40,13 +40,13 @@ pub fn get_drainage(newh: &[u32], downhill: &[isize], _boundary_len: usize) -> B // let base_flux = 1.0 / ((WORLD_SIZE.x * WORLD_SIZE.y) as f32); let base_flux = 1.0; let mut flux = vec![base_flux; WORLD_SIZE.x * WORLD_SIZE.y].into_boxed_slice(); - for &chunk_idx in newh.into_iter().rev() { + newh.into_iter().rev().for_each(|&chunk_idx| { let chunk_idx = chunk_idx as usize; let downhill_idx = downhill[chunk_idx]; if downhill_idx >= 0 { flux[downhill_idx as usize] += flux[chunk_idx]; } - } + }); flux } @@ -68,18 +68,18 @@ pub fn get_multi_drainage( // let base_flux = 1.0 / ((WORLD_SIZE.x * WORLD_SIZE.y) as f32); let base_area = 1.0; let mut area = vec![base_area; WORLD_SIZE.x * WORLD_SIZE.y].into_boxed_slice(); - for &ij in mstack { + mstack.into_iter().for_each(|&ij| { let ij = ij as usize; let wrec_ij = &mwrec[ij]; let area_ij = area[ij]; // debug_assert!(area_ij >= 0.0); - for (k, ijr) in mrec_downhill(mrec, ij) { + mrec_downhill(mrec, ij).for_each(|(k, ijr)| { /* if area_ij != 1.0 { println!("ij={:?} wrec_ij={:?} k={:?} ijr={:?} area_ij={:?} wrec_ij={:?}", ij, wrec_ij, k, ijr, area_ij, wrec_ij[k]); } */ area[ijr] += area_ij * /*wrec_ij.extract(k);*/wrec_ij[k]; - } - } + }); + }); area /* a=dx*dy*precip @@ -218,14 +218,14 @@ pub fn get_rivers<F: fmt::Debug + Float + Into<f64>, G: Float + Into<f64>>( // NOTE: This technically makes us discontinuous, so we should be cautious about using this. let derivative_divisor = 1.0; let height_scale = 1.0; // 1.0 / CONFIG.mountain_scale as f64; - for &chunk_idx in newh.into_iter().rev() { + newh.into_iter().rev().for_each(|&chunk_idx| { let chunk_idx = chunk_idx as usize; let downhill_idx = downhill[chunk_idx]; if downhill_idx < 0 { // We are in the ocean. debug_assert!(downhill_idx == -2); rivers[chunk_idx].river_kind = Some(RiverKind::Ocean); - continue; + return; } let downhill_idx = downhill_idx as usize; let downhill_pos = uniform_idx_as_vec2(downhill_idx); @@ -405,7 +405,7 @@ pub fn get_rivers<F: fmt::Debug + Float + Into<f64>, G: Float + Into<f64>>( neighbor_pass_pos: neighbor_pass_pos * TerrainChunkSize::RECT_SIZE.map(|e| e as i32), }); - continue; + return; } // Otherwise, we must be a river. } else { @@ -541,7 +541,7 @@ pub fn get_rivers<F: fmt::Debug + Float + Into<f64>, G: Float + Into<f64>>( } else { None }; - } + }); rivers } @@ -922,7 +922,7 @@ fn erode( let m = m_f(posi) as f64; let mwrec_i = &mwrec[posi]; - for (kk, posj) in mrec_downhill(&mrec, posi) { + mrec_downhill(&mrec, posi).for_each(|(kk, posj)| { // let posj = posj as usize; let dxy = (uniform_idx_as_vec2(posi) - uniform_idx_as_vec2(posj)).map(|e| e as f64); let neighbor_distance = (neighbor_coef * dxy).magnitude(); @@ -930,7 +930,7 @@ fn erode( // let knew = (k * (p * chunk_area * (area[posi] as f64 * mwrec_i.extract(kk) as f64)).powf(m) / neighbor_distance.powf(n)) as Compute; k_tot[kk] = knew; // k_tot = k_tot.replace(kk, knew); - } + }); k_tot } }) @@ -984,7 +984,7 @@ fn erode( // let k = k_df * dt; let mwrec_i = &mwrec[posi]; - for (kk, posj) in mrec_downhill(&mrec, posi) { + mrec_downhill(&mrec, posi).for_each(|(kk, posj)| { let mwrec_kk = mwrec_i[kk]; // let posj = posj as usize; @@ -1048,7 +1048,7 @@ fn erode( // let knew = 0.0; k_tot[kk] = knew; // k_tot = k_tot.replace(kk, knew); - } + }); k_tot } }) @@ -1145,7 +1145,7 @@ fn erode( { // guess/update the elevation at t+Δt (k) h_p.par_iter_mut() - .zip(/*h_*/ h.par_iter()) /*.zip(wh.par_iter())*/ + .zip_eq(/*h_*/ h.par_iter()) /*.zip(wh.par_iter())*/ .for_each(|((mut h_p, h_)/*, wh_*/)| { *h_p = (*h_)/*.max(*wh_)*/ as Compute; }); @@ -1205,7 +1205,8 @@ fn erode( // // After: // deltah_i = Σ{j ∈ {i} ∪ upstream_i(t)}(h_j(t, FINAL) + U_j * Δt - h_i(t + Δt, k)) - for &posi in /*newh.iter().rev()*/mstack.into_iter() { + /*newh.iter().rev()*/ + mstack.into_iter().for_each(|&posi| { let posi = posi as usize; let posj = dh[posi]; if posj < 0 { @@ -1245,9 +1246,9 @@ fn erode( } deltah[posi] += (/* ht[posi] + at[posi].max(0.0) */h_t[posi] + uplift_i) as Compute - h_p[posi]; let mwrec_i = &mwrec[posi]; - for (k, posj) in mrec_downhill(&mrec, posi) { + mrec_downhill(&mrec, posi).for_each(|(k, posj)| { deltah[posj] += deltah[posi] * mwrec_i[k]/*mwrec_i.extract(k)*/; - } + }); /* let posj = posj as usize; deltah[posj] += deltah[posi]; */ @@ -1263,7 +1264,7 @@ fn erode( ht[posi], at[posi]); debug_assert_eq!(deltah[posi], deltah_sediment[posi] + deltah_alluvium[posi]); } */ - } + }); log::debug!("(Done sediment transport computation)."); // do ij=nn,1,-1 // ijk=stack(ij) @@ -1348,7 +1349,8 @@ fn erode( let mut simd_buf = [0.0; 8]; let mut simd_buf2 = [0.0; 8]; let mut simd_buf3 = [0.0; 8]; - for &posi in /*&*newh*/mstack.into_iter().rev() { + /*&*newh*/ + mstack.into_iter().rev().for_each(|&posi| { let posi = posi as usize; let old_elev_i = /*h*/elev[posi] as f64; let old_wh_i = wh[posi]; @@ -1470,7 +1472,7 @@ fn erode( if /*n == 1.0*/(n - 1.0).abs() <= 1.0e-3/*f64::EPSILON*/ && (q_ - 1.0).abs() <= 1.0e-3 { let mut f = h0; let mut df = 1.0; - for (kk, posj) in mrec_downhill(&mrec, posi) { + mrec_downhill(&mrec, posi).for_each(|(kk, posj)| { // This can happen in cases where receiver kk is neither uphill of // nor downhill from posi's direct receiver. if /*new_ht_i/*old_ht_i*/ >= (h_t[posj]/* + uplift(posj) as f64*/)*/old_elev_i /*>=*/> wh[posj] as f64/*h[posj]*//*h_j*/ { @@ -1484,7 +1486,7 @@ fn erode( f += fact * elev_j; df += fact; } - } + }); new_h_i = f / df; } else { // Local Newton-Raphson @@ -1506,7 +1508,7 @@ fn erode( // let czero = f64s(0.0);//f64x8::splat(0.0); */ let mut rec_heights = [0.0; 8];//f64s(0.0);//f64x8::splat(0.0); let mut mask = [MaskType::new(false); 8]; - for (kk, posj) in mrec_downhill(&mrec, posi) { + mrec_downhill(&mrec, posi).for_each(|(kk, posj)| { if old_elev_i > wh[posj] as f64 { // k_fs_weights[kk] = k_fs_fact[kk] as SimdType; // k_fs_weights[max] = k_fs_fact[kk] as SimdType; @@ -1520,7 +1522,7 @@ fn erode( // /*rec_heights = */rec_heights.replace(max, h[posj] as f64); // max += 1; } - } + }); /* let (weights_heights, max) = { let mut iter = mrec_downhill(&mrec, posi); let mut max = 0usize; @@ -1664,7 +1666,7 @@ fn erode( df += (n as SimdType * dh_fs_sample + q_ as SimdType * dh_df_sample) as f64; } } */ - for kk in (0..8) { + (0..8).for_each(|kk| { //if /*new_ht_i/*old_ht_i*/ > (h_t[posj]/* + uplift(posj) as f64*/)*/old_elev_i /*>=*/> wh[posj] as f64/*h[posj]*//*h_j*/ { if mask[kk].test() { let h_j = rec_heights[kk]; @@ -1679,7 +1681,7 @@ fn erode( df += (n as SimdType * dh_fs_sample + q_ as SimdType * dh_df_sample) as f64; } //} - } + }); /* for (kk, posj) in mrec_downhill(&mrec, posi) { if /*new_ht_i/*old_ht_i*/ > (h_t[posj]/* + uplift(posj) as f64*/)*/old_elev_i /*>=*/> wh[posj] as f64/*h[posj]*//*h_j*/ { let h_j = h[posj] as f64; @@ -1979,7 +1981,7 @@ fn erode( } */ // Add sum of squares of errors. sum_err += (h[posi]/*.max(wh[posi])*/ as Compute/* + a[posi].max(0.0) as Compute*/ - /*hp*/h_p[posi] as Compute/* - ap[posi].max(0.0) as Compute*/).powi(2); - } + }); log::debug!( "(Done erosion computation, time={:?}ms)", start_time.elapsed().as_millis() @@ -2047,7 +2049,7 @@ fn erode( } */ // TODO: Consider taking advantage of multi-receiver flow here. b.par_iter_mut() - .zip(h.par_iter()) + .zip_eq(h.par_iter()) .enumerate() .for_each(|(posi, (mut b, &h_i))| { let old_b_i = *b; @@ -2174,7 +2176,7 @@ fn erode( ntherm = 0usize; prods_therm = 0.0; prodscrit_therm = 0.0; - for &posi in &*newh { + newh.iter().for_each(|&posi| { let posi = posi as usize; let old_h_i = h/*b*/[posi] as f64; // let old_a_i = a[posi]; @@ -2351,7 +2353,7 @@ fn erode( } maxh = h_i.max(maxh); } - } + }); log::debug!( "Done applying thermal erosion (max height: {:?}) (avg height: {:?}) (min height: {:?}) (avg slope: {:?})\n \ (above talus angle, geom. mean slope [actual/critical/ratio]: {:?} / {:?} / {:?})\n \ @@ -2438,11 +2440,11 @@ pub fn fill_sinks<F: Float + Send + Sync>( loop { let mut changed = false; - for posi in 0..newh.len() { + (0..newh.len()).for_each(|posi| { let nh = newh[posi]; let oh = oldh[posi]; if nh == oh { - continue; + return; } for nposi in neighbors(posi) { let onbh = newh[nposi]; @@ -2471,7 +2473,7 @@ pub fn fill_sinks<F: Float + Send + Sync>( changed = true; } } - } + }); if !changed { return newh; } @@ -2520,44 +2522,44 @@ pub fn get_lakes<F: Float>( let mut indirection_ = vec![0u32; indirection.len()]; // First, find all the lakes. let mut lake_roots = Vec::with_capacity(downhill.len()); // Test - for (chunk_idx, &dh) in (&*downhill) + (&*downhill) .into_iter() .enumerate() .filter(|(_, &dh_idx)| dh_idx < 0) - { - if dh == -2 { - // On the boundary, add to the boundary vector. - boundary.push(chunk_idx); - // Still considered a lake root, though. - } else if dh == -1 { - lake_roots.push(chunk_idx); - } else { - panic!("Impossible."); - } - // Find all the nodes uphill from this lake. Since there is only one outgoing edge - // in the "downhill" graph, this is guaranteed never to visit a node more than - // once. - let start = newh.len(); - let indirection_idx = (start * 8) as u32; - // New lake root - newh.push(chunk_idx as u32); - let mut cur = start; - while cur < newh.len() { - let node = newh[cur as usize]; - for child in uphill(downhill, node as usize) { - // lake_idx is the index of our lake root. - indirection[child] = chunk_idx as i32; - indirection_[child] = indirection_idx; - newh.push(child as u32); + .for_each(|(chunk_idx, &dh)| { + if dh == -2 { + // On the boundary, add to the boundary vector. + boundary.push(chunk_idx); + // Still considered a lake root, though. + } else if dh == -1 { + lake_roots.push(chunk_idx); + } else { + panic!("Impossible."); } - cur += 1; - } - // Find the number of elements pushed. - let length = (cur - start) * 8; - // New lake root (lakes have indirection set to -length). - indirection[chunk_idx] = -(length as i32); - indirection_[chunk_idx] = indirection_idx; - } + // Find all the nodes uphill from this lake. Since there is only one outgoing edge + // in the "downhill" graph, this is guaranteed never to visit a node more than + // once. + let start = newh.len(); + let indirection_idx = (start * 8) as u32; + // New lake root + newh.push(chunk_idx as u32); + let mut cur = start; + while cur < newh.len() { + let node = newh[cur as usize]; + uphill(downhill, node as usize).for_each(|child| { + // lake_idx is the index of our lake root. + indirection[child] = chunk_idx as i32; + indirection_[child] = indirection_idx; + newh.push(child as u32); + }); + cur += 1; + } + // Find the number of elements pushed. + let length = (cur - start) * 8; + // New lake root (lakes have indirection set to -length). + indirection[chunk_idx] = -(length as i32); + indirection_[chunk_idx] = indirection_idx; + }); assert_eq!(newh.len(), downhill.len()); log::debug!("Old lake roots: {:?}", lake_roots.len()); @@ -2569,7 +2571,7 @@ pub fn get_lakes<F: Float>( // space, and augment our indirection vector with the starting index, resulting in a vector of // slices. As we go, we replace each lake entry with its index in the new indirection buffer, // allowing - for &chunk_idx_ in newh.into_iter() { + newh.into_iter().for_each(|&chunk_idx_| { let chunk_idx = chunk_idx_ as usize; let lake_idx_ = indirection_[chunk_idx]; let lake_idx = lake_idx_ as usize; @@ -2582,7 +2584,7 @@ pub fn get_lakes<F: Float>( // get its height. We do the same thing in our own lake's entry list. If the maximum // of the heights we get out from this process is greater than the maximum of this // chunk and its neighbor chunk, we switch to this new edge. - for neighbor_idx in neighbors(chunk_idx) { + neighbors(chunk_idx).for_each(|neighbor_idx| { let neighbor_height = h(neighbor_idx); let neighbor_lake_idx_ = indirection_[neighbor_idx]; let neighbor_lake_idx = neighbor_lake_idx_ as usize; @@ -2670,8 +2672,8 @@ pub fn get_lakes<F: Float>( } } } - } - } + }); + }); // Now it's time to calculate the lake connections graph T_L covering G_L. let mut candidates = BinaryHeap::with_capacity(indirection.len()); @@ -2679,11 +2681,11 @@ pub fn get_lakes<F: Float>( // We start by going through each pass, deleting the ones that point out of boundary nodes and // adding ones that point into boundary nodes from non-boundary nodes. - for edge in &mut lakes { + lakes.iter_mut().for_each(|edge| { let edge: &mut (i32, u32) = edge; // Only consider valid elements. if edge.0 == -1 { - continue; + return; } // Check to see if this edge points out *from* a boundary node. // Delete it if so. @@ -2696,7 +2698,7 @@ pub fn get_lakes<F: Float>( }; if downhill[lake_idx] == -2 { edge.0 = -1; - continue; + return; } // This edge is not pointing out from a boundary node. // Check to see if this edge points *to* a boundary node. @@ -2716,7 +2718,7 @@ pub fn get_lakes<F: Float>( (edge.0 as u32, edge.1), ))); } - } + }); let mut pass_flows_sorted: Vec<usize> = Vec::with_capacity(indirection.len()); @@ -2875,10 +2877,10 @@ pub fn get_lakes<F: Float>( let mut rcv_cost = -f64::INFINITY;/*f64::EPSILON;*/ let outflow_distance = (outflow_coords - coords).map(|e| e as f64); - for ineighbor in neighbors(node) { + neighbors(node).for_each(|ineighbor| { if indirection[ineighbor] != chunk_idx as i32 && ineighbor != chunk_idx && ineighbor != neighbor_pass_idx || h(ineighbor) > elev { - continue; + return; } let dxy = (uniform_idx_as_vec2(ineighbor) - coords).map(|e| e as f64); let neighbor_distance = (/*neighbor_coef * */dxy); @@ -2912,7 +2914,7 @@ pub fn get_lakes<F: Float>( }, _ => {} } - } + }); assert!(rcv != -1); downhill[node] = rcv; // dist2receivers(node) = d8_distances[detail::get_d8_distance_id(node, rcv, static_cast<Node_T>(ncols))]; @@ -2935,7 +2937,7 @@ pub fn get_lakes<F: Float>( // let node = newh[cur as usize]; // cur += 1; - for child in uphill(downhill, node as usize) { + uphill(downhill, node as usize).for_each(|child| { // lake_idx is the index of our lake root. // Check to make sure child (flowing into us) is in the same lake. if indirection[child] == chunk_idx as i32 || child == chunk_idx @@ -2946,7 +2948,7 @@ pub fn get_lakes<F: Float>( // assert!(h[child] >= h[node as usize]); newh.push(child as u32); } - } + }); if cur == newh.len() { break; @@ -3053,7 +3055,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( // let deltah = F::epsilon() * 2 * maxh; // NumCast::from(nn); /* 1.0e-8 */ // let deltah : F = (1.0e-7 as Compute).into(); - for &ijk in newh { + newh.into_iter().for_each(|&ijk| { let ijk = ijk as usize; let h_i = h(ijk); let ijr = downhill[ijk]; @@ -3074,7 +3076,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( } else { h_i }; - } + }); let mut wrec = Vec::<Computex8>::with_capacity(nn); let mut mrec = Vec::with_capacity(nn); @@ -3104,7 +3106,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( .map(move |(k, pos)| (k, vec2_as_uniform_idx(pos))) }; - for (k, ijk) in neighbor_iter(ij) { + neighbor_iter(ij).for_each(|(k, ijk)| { let wh_ijk = wh[ijk]; // let h_ijk = h(ijk); if @@ -3119,7 +3121,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( // Avoiding ambiguous cases. ndon_ij += 1; } - } + }); // let vis_ij = mrec_ij.count_ones(); (mrec_ij, (ndon_ij, ndon_ij)) }) @@ -3135,7 +3137,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( let mut wrec = /*Computex8::splat(czero)*/[czero; 8]; let mut nrec = 0; // let mut wrec: [Compute; 8] = [czero, czero, czero, czero, czero, czero, czero, czero]; - for (k, ijk) in mrec_downhill(&mrec, ij) { + mrec_downhill(&mrec, ij).for_each(|(k, ijk)| { let lrec_ijk = ((uniform_idx_as_vec2(ijk) - uniform_idx_as_vec2(ij)) .map(|e| e as Compute) * dxdy) @@ -3146,7 +3148,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( // wrec = wrec.replace(k, wrec_ijk); sumweight += wrec_ijk; nrec += 1; - } + }); // let (_, ndon_ij) = ndon[ij]; let slope = sumweight / (nrec as Compute).max(1.0); let p_mfd_exp = 0.5 + 0.6 * slope; @@ -3171,16 +3173,16 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( debug_assert_eq!(sumweight, <Compute as One>::one()); } */ sumweight = czero; - for wrec_k in &mut wrec { + wrec.iter_mut().for_each(|wrec_k| { let wrec_ijk = wrec_k.powf(p_mfd_exp); sumweight += wrec_ijk; *wrec_k = wrec_ijk; - } + }); if sumweight > czero { // wrec /= sumweight; - for wrec_k in &mut wrec { + wrec.iter_mut().for_each(|wrec_k| { *wrec_k /= sumweight; - } + }); } wrec }) @@ -3192,7 +3194,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( let mut parse = Vec::with_capacity(nn); // we go through the nodes - for ij in 0..nn { + (0..nn).for_each(|ij| { let (ndon_ij, _) = don_vis[ij]; // when we find a "summit" (ie a node that has no donors) // we parse it (put it in a stack called parse) @@ -3203,7 +3205,7 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( while let Some(ijn) = parse.pop() { // we add the node to the stack stack.push(ijn as u32); - for (_, ijr) in mrec_downhill(&mrec, ijn) { + mrec_downhill(&mrec, ijn).for_each(|(_, ijr)| { let (_, ref mut vis_ijr) = don_vis[ijr]; if *vis_ijr >= 1 { *vis_ijr -= 1; @@ -3211,9 +3213,9 @@ pub fn get_multi_rec<F: fmt::Debug + Float + Sync + Into<Compute>>( parse.push(ijr); } } - } + }); } - } + }); assert_eq!(stack.len(), nn); stack @@ -3360,7 +3362,7 @@ pub fn do_erosion( let alpha = |posi: usize| alpha[posi]; // Hillslope diffusion coefficient for sediment. let mut is_done = bitbox![0; WORLD_SIZE.x * WORLD_SIZE.y]; - for i in 0..n_steps { + (0..n_steps).for_each(|i| { log::debug!("Erosion iteration #{:?}", i); erode( &mut h, @@ -3389,6 +3391,6 @@ pub fn do_erosion( alpha, |posi| is_ocean(posi), ); - } + }); (h, b /*, a*/) } diff --git a/world/src/sim/mod.rs b/world/src/sim/mod.rs index 4b2d220564..e42d605aa4 100644 --- a/world/src/sim/mod.rs +++ b/world/src/sim/mod.rs @@ -21,6 +21,7 @@ pub use self::util::{ use crate::{ all::ForestKind, + block::BlockGen, column::ColumnGen, generator::TownState, util::{seed_expan, FastNoise, RandomField, Sampler, StructureGen2d}, @@ -30,6 +31,7 @@ use common::{ terrain::{BiomeKind, TerrainChunkSize}, vol::RectVolSize, }; +use hashbrown::HashMap; use noise::{ BasicMulti, Billow, Fbm, HybridMulti, MultiFractal, NoiseFn, RangeFunction, RidgedMulti, Seedable, SuperSimplex, Worley, @@ -40,7 +42,6 @@ use rand_chacha::ChaChaRng; use rayon::prelude::*; use serde_derive::{Deserialize, Serialize}; use std::{ - collections::HashMap, f32, f64, fs::File, io::{BufReader, BufWriter}, @@ -1688,6 +1689,7 @@ impl WorldSim { /// Prepare the world for simulation pub fn seed_elements(&mut self) { let mut rng = self.rng.clone(); + let random_loc = RandomField::new(rng.gen()); let cell_size = 16; let grid_size = WORLD_SIZE / cell_size; @@ -1697,7 +1699,7 @@ impl WorldSim { let mut locations = Vec::new(); // Seed the world with some locations - for _ in 0..loc_count { + (0..loc_count).for_each(|_| { let cell_pos = Vec2::new( self.rng.gen::<usize>() % grid_size.x, self.rng.gen::<usize>() % grid_size.y, @@ -1710,7 +1712,7 @@ impl WorldSim { locations.push(Location::generate(wpos, &mut rng)); loc_grid[cell_pos.y * grid_size.x + cell_pos.x] = Some(locations.len() - 1); - } + }); // Find neighbours let mut loc_clone = locations @@ -1718,7 +1720,7 @@ impl WorldSim { .map(|l| l.center) .enumerate() .collect::<Vec<_>>(); - for i in 0..locations.len() { + (0..locations.len()).for_each(|i| { let pos = locations[i].center.map(|e| e as i64); loc_clone.sort_by_key(|(_, l)| l.map(|e| e as i64).distance_squared(pos)); @@ -1727,13 +1729,13 @@ impl WorldSim { locations[i].neighbours.insert(*j); locations[*j].neighbours.insert(i); }); - } + }); // Simulate invasion! let invasion_cycles = 25; - for _ in 0..invasion_cycles { - for i in 0..grid_size.x { - for j in 0..grid_size.y { + (0..invasion_cycles).for_each(|_| { + (0..grid_size.y).for_each(|j| { + (0..grid_size.x).for_each(|i| { if loc_grid[j * grid_size.x + i].is_none() { const R_COORDS: [i32; 5] = [-1, 0, 1, 0, -1]; let idx = self.rng.gen::<usize>() % 4; @@ -1745,15 +1747,20 @@ impl WorldSim { loc_grid.get(loc.y * grid_size.x + loc.x).cloned().flatten(); } } - } - } - } + }); + }); + }); // Place the locations onto the world let gen = StructureGen2d::new(self.seed, cell_size as u32, cell_size as u32 / 2); - for i in 0..WORLD_SIZE.x { - for j in 0..WORLD_SIZE.y { - let chunk_pos = Vec2::new(i as i32, j as i32); + + self.chunks + .par_iter_mut() + .enumerate() + .for_each(|(ij, chunk)| { + let chunk_pos = uniform_idx_as_vec2(ij); + let i = chunk_pos.x as usize; + let j = chunk_pos.y as usize; let block_pos = Vec2::new( chunk_pos.x * TerrainChunkSize::RECT_SIZE.x as i32, chunk_pos.y * TerrainChunkSize::RECT_SIZE.y as i32, @@ -1779,16 +1786,14 @@ impl WorldSim { let nearest_cell_pos = near[0].chunk_pos; if nearest_cell_pos.x >= 0 && nearest_cell_pos.y >= 0 { let nearest_cell_pos = nearest_cell_pos.map(|e| e as usize) / cell_size; - self.get_mut(chunk_pos).unwrap().location = loc_grid + chunk.location = loc_grid .get(nearest_cell_pos.y * grid_size.x + nearest_cell_pos.x) .cloned() .unwrap_or(None) .map(|loc_idx| LocationInfo { loc_idx, near }); let town_size = 200; - let in_town = self - .get(chunk_pos) - .unwrap() + let in_town = chunk .location .as_ref() .map(|l| { @@ -1799,46 +1804,61 @@ impl WorldSim { < town_size * town_size }) .unwrap_or(false); + if in_town { - self.get_mut(chunk_pos).unwrap().spawn_rate = 0.0; + chunk.spawn_rate = 0.0; } } - } - } + }); // Stage 2 - towns! - let mut maybe_towns = HashMap::new(); - for i in 0..WORLD_SIZE.x { - for j in 0..WORLD_SIZE.y { - let chunk_pos = Vec2::new(i as i32, j as i32); - let wpos = chunk_pos.map2(Vec2::from(TerrainChunkSize::RECT_SIZE), |e, sz: u32| { - e * sz as i32 + sz as i32 / 2 - }); + let chunk_idx_center = |e: Vec2<i32>| { + e.map2(TerrainChunkSize::RECT_SIZE, |e, sz: u32| { + e * sz as i32 + sz as i32 / 2 + }) + }; + let maybe_towns = self + .gen_ctx + .town_gen + .par_iter( + chunk_idx_center(Vec2::zero()), + chunk_idx_center(WORLD_SIZE.map(|e| e as i32)), + ) + .map_init( + || BlockGen::new(ColumnGen::new(self)), + |mut block_gen, (pos, seed)| { + let mut rng = ChaChaRng::from_seed(seed_expan::rng_state(seed)); + // println!("Town: {:?}", town); + TownState::generate(pos, &mut block_gen, &mut rng).map(|t| (pos, Arc::new(t))) + }, + ) + .filter_map(|x| x) + .collect::<HashMap<_, _>>(); - let near_towns = self.gen_ctx.town_gen.get(wpos); + let gen_ctx = &self.gen_ctx; + self.chunks + .par_iter_mut() + .enumerate() + .for_each(|(ij, chunk)| { + let chunk_pos = uniform_idx_as_vec2(ij); + let wpos = chunk_idx_center(chunk_pos); + + let near_towns = gen_ctx.town_gen.get(wpos); let town = near_towns .iter() .min_by_key(|(pos, _seed)| wpos.distance_squared(*pos)); - if let Some((pos, _)) = town { - let maybe_town = maybe_towns - .entry(*pos) - .or_insert_with(|| { - // println!("Town: {:?}", town); - TownState::generate(*pos, &mut ColumnGen::new(self), &mut rng) - .map(|t| Arc::new(t)) - }) - .as_mut() - // Only care if we're close to the town - .filter(|town| { - Vec2::from(town.center()).distance_squared(wpos) - < town.radius().add(64).pow(2) - }) - .cloned(); - self.get_mut(chunk_pos).unwrap().structures.town = maybe_town; - } - } - } + let maybe_town = town + .and_then(|(pos, _seed)| maybe_towns.get(pos)) + // Only care if we're close to the town + .filter(|town| { + Vec2::from(town.center()).distance_squared(wpos) + < town.radius().add(64).pow(2) + }) + .cloned(); + + chunk.structures.town = maybe_town; + }); self.rng = rng; self.locations = locations; diff --git a/world/src/util/random.rs b/world/src/util/random.rs index b90228ad17..3a2bb6750c 100644 --- a/world/src/util/random.rs +++ b/world/src/util/random.rs @@ -2,6 +2,7 @@ use super::seed_expan; use super::Sampler; use vek::*; +#[derive(Clone, Copy)] pub struct RandomField { seed: u32, } diff --git a/world/src/util/structure.rs b/world/src/util/structure.rs index 6c2e570c49..e7c002dbbd 100644 --- a/world/src/util/structure.rs +++ b/world/src/util/structure.rs @@ -1,4 +1,6 @@ use super::{RandomField, Sampler}; +use crate::block::BlockGen; +use rayon::prelude::*; use vek::*; pub struct StructureGen2d { @@ -9,6 +11,8 @@ pub struct StructureGen2d { seed_field: RandomField, } +pub type StructureField = (Vec2<i32>, u32); + impl StructureGen2d { pub fn new(seed: u32, freq: u32, spread: u32) -> Self { Self { @@ -19,32 +23,130 @@ impl StructureGen2d { seed_field: RandomField::new(seed + 2), } } + + #[inline] + fn sample_to_index_internal(freq: i32, pos: Vec2<i32>) -> Vec2<i32> { + pos.map(|e| e.div_euclid(freq)) + } + + #[inline] + pub fn sample_to_index(&self, pos: Vec2<i32>) -> Vec2<i32> { + Self::sample_to_index_internal(self.freq as i32, pos) + } + + #[inline] + fn freq_offset(freq: i32) -> i32 { + freq / 2 + } + + #[inline] + fn spread_mul(spread: u32) -> u32 { + spread * 2 + } + + #[inline] + fn index_to_sample_internal( + freq: i32, + freq_offset: i32, + spread: i32, + spread_mul: u32, + x_field: RandomField, + y_field: RandomField, + seed_field: RandomField, + index: Vec2<i32>, + ) -> StructureField { + let center = index * freq + freq_offset; + let pos = Vec3::from(center); + ( + center + + Vec2::new( + (x_field.get(pos) % spread_mul) as i32 - spread, + (y_field.get(pos) % spread_mul) as i32 - spread, + ), + seed_field.get(pos), + ) + } + + /// Note: Generates all possible closest samples for elements in the range of min to max, + /// *exclusive.* + pub fn par_iter( + &self, + min: Vec2<i32>, + max: Vec2<i32>, + ) -> impl ParallelIterator<Item = StructureField> { + let freq = self.freq; + let spread = self.spread; + let spread_mul = Self::spread_mul(spread); + assert!(spread * 2 == spread_mul); + assert!(spread_mul <= freq); + let spread = spread as i32; + let freq = freq as i32; + let freq_offset = Self::freq_offset(freq); + assert!(freq_offset * 2 == freq); + + let min_index = Self::sample_to_index_internal(freq, min) - 1; + let max_index = Self::sample_to_index_internal(freq, max) + 1; + assert!(min_index.x < max_index.x); + // NOTE: xlen > 0 + let xlen = (max_index.x - min_index.x) as u32; + assert!(min_index.y < max_index.y); + // NOTE: ylen > 0 + let ylen = (max_index.y - min_index.y) as u32; + // NOTE: Cannot fail, since every product of u32s fits in a u64. + let len = ylen as u64 * xlen as u64; + // NOTE: since iteration is *exclusive* for the initial range, it's fine that we don't go + // up to the maximum value. + // NOTE: we convert to usize first, and then iterate, because we want to make sure we get + // a properly indexed parallel iterator that can deal with the whole range at once. + let x_field = self.x_field; + let y_field = self.y_field; + let seed_field = self.seed_field; + (0..len).into_par_iter().map(move |xy| { + let index = min_index + Vec2::new((xy % xlen as u64) as i32, (xy / xlen as u64) as i32); + Self::index_to_sample_internal( + freq, + freq_offset, + spread, + spread_mul, + x_field, + y_field, + seed_field, + index, + ) + }) + } } impl Sampler<'static> for StructureGen2d { type Index = Vec2<i32>; - type Sample = [(Vec2<i32>, u32); 9]; + type Sample = [StructureField; 9]; fn get(&self, sample_pos: Self::Index) -> Self::Sample { let mut samples = [(Vec2::zero(), 0); 9]; - let sample_closest = sample_pos.map(|e| e - e.rem_euclid(self.freq as i32)); + let freq = self.freq; + let spread = self.spread; + let spread_mul = Self::spread_mul(spread); + let spread = spread as i32; + let freq = freq as i32; + let freq_offset = Self::freq_offset(freq); + + let sample_closest = Self::sample_to_index_internal(freq, sample_pos); for i in 0..3 { for j in 0..3 { - let center = sample_closest - + Vec2::new(i, j).map(|e| e as i32 - 1) * self.freq as i32 - + self.freq as i32 / 2; - samples[i * 3 + j] = ( - center - + Vec2::new( - (self.x_field.get(Vec3::from(center)) % (self.spread * 2)) as i32 - - self.spread as i32, - (self.y_field.get(Vec3::from(center)) % (self.spread * 2)) as i32 - - self.spread as i32, - ), - self.seed_field.get(Vec3::from(center)), + let index = sample_closest + Vec2::new(i as i32, j as i32) - 1; + let sample = Self::index_to_sample_internal( + freq, + freq_offset, + spread, + spread_mul, + self.x_field, + self.y_field, + self.seed_field, + index, ); + samples[i * 3 + j] = sample; } }