Addressing PR issues.

2025-07-25 04:42:23 +00:00 · 2019-08-22 23:48:13 +02:00
parent ba1a146cd5
commit 2ff192021c
3 changed files with 170 additions and 159 deletions
--- a/world/src/lib.rs
+++ b/world/src/lib.rs
@ -1,6 +1,5 @@
 #![deny(unsafe_code)]
 #![feature(
-    box_syntax,
    const_generics,
    euclidean_division,
    bind_by_move_pattern_guards,
--- a/world/src/sim/mod.rs
+++ b/world/src/sim/mod.rs
@ -1,9 +1,13 @@
+mod util;
 mod location;
 mod settlement;

 // Reexports
 pub use self::location::Location;
 pub use self::settlement::Settlement;
+use self::util::{
+    cdf_irwin_hall, InverseCdf, uniform_idx_as_vec2, uniform_noise,
+};

 use crate::{
    all::ForestKind,
@ -27,161 +31,6 @@ use vek::*;

 pub const WORLD_SIZE: Vec2<usize> = Vec2 { x: 1024, y: 1024 };

-/// Computes the cumulative distribution function of the weighted sum of k independent,
-/// uniformly distributed random variables between 0 and 1.  For each variable i, we use weights[i]
-/// as the weight to give samples[i] (the weights should all be positive).
-///
-/// If the precondition is met, the distribution of the result of calling this function will be
-/// uniformly distributed while preserving the same information that was in the original average.
-///
-/// For N > 33 the function will no longer return correct results since we will overflow u32.
-///
-/// NOTE:
-///
-/// Per [1], the problem of determing the CDF of
-/// the sum of uniformly distributed random variables over *different* ranges is considerably more
-/// complicated than it is for the same-range case.  Fortunately, it also provides a reference to
-/// [2], which contains a complete derivation of an exact rule for the density function for
-/// this case.  The CDF is just the integral of the cumulative distribution function [3],
-/// which we use to convert this into a CDF formula.
-///
-/// This allows us to sum weighted, uniform, independent random variables.
-///
-/// At some point, we should probably contribute this back to stats-rs.
-///
-/// 1. https://www.r-bloggers.com/sums-of-random-variables/,
-/// 2. Sadooghi-Alvandi, S., A. Nematollahi, & R. Habibi, 2009.
-///    On the Distribution of the Sum of Independent Uniform Random Variables.
-///    Statistical Papers, 50, 171-175.
-/// 3. hhttps://en.wikipedia.org/wiki/Cumulative_distribution_function
-fn cdf_irwin_hall<const N: usize>(weights: &[f32; N], samples: [f32; N]) -> f32 {
-    // Let J_k = {(j_1, ... , j_k) : 1 ≤ j_1 < j_2 < ··· < j_k ≤ N }.
-    //
-    // Let A_N = Π{k = 1 to n}a_k.
-    //
-    // The density function for N ≥ 2 is:
-    //
-    //   1/(A_N * (N - 1)!) * (x^(N-1) + Σ{k = 1 to N}((-1)^k *
-    //   Σ{(j_1, ..., j_k) ∈ J_k}(max(0, x - Σ{l = 1 to k}(a_(j_l)))^(N - 1))))
-    //
-    // So the cumulative distribution function is its integral, i.e. (I think)
-    //
-    // 1/(product{k in A}(k) * N!) * (x^N + sum(k in 1 to N)((-1)^k *
-    // sum{j in Subsets[A, {k}]}(max(0, x - sum{l in j}(l))^N)))
-    //
-    // which is also equivalent to
-    //
-    //   (letting B_k = { a in Subsets[A, {k}] : sum {l in a} l }, B_(0,1) = 0 and
-    //            H_k = { i : 1 ≤ 1 ≤ N! / (k! * (N - k)!) })
-    //
-    //   1/(product{k in A}(k) * N!) * sum(k in 0 to N)((-1)^k *
-    //   sum{l in H_k}(max(0, x - B_(k,l))^N))
-    //
-    // We should be able to iterate through the whole power set
-    // instead, and figure out K by calling count_ones(), so we can compute the result in O(2^N)
-    // iterations.
-    let x: f32 = weights
-        .iter()
-        .zip(samples.iter())
-        .map(|(weight, sample)| weight * sample)
-        .sum();
-
-    let mut y = 0.0f32;
-    for subset in 0u32..(1 << N) {
-        // Number of set elements
-        let k = subset.count_ones();
-        // Add together exactly the set elements to get B_subset
-        let z = weights
-            .iter()
-            .enumerate()
-            .filter(|(i, _)| subset & (1 << i) as u32 != 0)
-            .map(|(_, k)| k)
-            .sum::<f32>();
-        // Compute max(0, x - B_subset)^N
-        let z = (x - z).max(0.0).powi(N as i32);
-        // The parity of k determines whether the sum is negated.
-        y += if k & 1 == 0 { z } else { -z };
-    }
-
-    // Divide by the product of the weights.
-    y /= weights.iter().product::<f32>();
-
-    // Remember to multiply by 1 / N! at the end.
-    y / (1..=N as i32).product::<i32>() as f32
-}
-
-/// First component of each element of the vector is the computed CDF of the noise function at this
-/// index (i.e. its position in a sorted list of value returned by the noise function applied to
-/// every chunk in the game).  Second component is the cached value of the noise function that
-/// generated the index.
-type InverseCdf = Box<[(f32, f32); WORLD_SIZE.x * WORLD_SIZE.y]>;
-
-/// Computes the position Vec2 of a SimChunk from an index, where the index was generated by
-/// uniform_noise.
-fn uniform_idx_as_vec2(idx: usize) -> Vec2<i32> {
-    Vec2::new((idx / WORLD_SIZE.x) as i32, (idx % WORLD_SIZE.x) as i32)
-}
-
-/// Compute inverse cumulative distribution function for arbitrary function f, the hard way.  We
-/// pre-generate noise values prior to worldgen, then sort them in order to determine the correct
-/// position in the sorted order.  That lets us use `(index + 1) / (WORLDSIZE.y * WORLDSIZE.x)` as
-/// a uniformly distributed (from almost-0 to 1) regularization of the chunks.  That is, if we
-/// apply the computed "function" F⁻¹(x, y) to (x, y) and get out p, it means that approximately
-/// (100 * p)% of chunks have a lower value for F⁻¹ than p.  The main purpose of doing this is to
-/// make sure we are using the entire range we want, and to allow us to apply the numerous results
-/// about distributions on uniform functions to the procedural noise we generate, which lets us
-/// much more reliably control the *number* of features in the world while still letting us play
-/// with the *shape* of those features, without having arbitrary cutoff points / discontinuities
-/// (which tend to produce ugly-looking / unnatural terrain).
-///
-/// As a concrete example, before doing this it was very hard to tweak humidity so that either most
-/// of the world wasn't dry, or most of it wasn't wet, by combining the billow noise function and
-/// the computed altitude.  This is because the billow noise function has a very unusual
-/// distribution that is heavily skewed towards 0.  By correcting for this tendency, we can start
-/// with uniformly distributed billow noise and altitudes and combine them to get uniformly
-/// distributed humidity, while still preserving the existing shapes that the billow noise and
-/// altitude functions produce.
-///
-/// f takes an index, which represents the index corresponding to this chunk in any any SimChunk
-/// vector returned by uniform_noise, and (for convenience) the float-translated version of those
-/// coordinates.
-/// f should return a value with no NaNs.  If there is a NaN, it will panic.  There are no other
-/// conditions on f.
-///
-/// Returns a vec of (f32, f32) pairs consisting of the percentage of chunks with a value lower than
-/// this one, and the actual noise value (we don't need to cache it, but it makes ensuring that
-/// subsequent code that needs the noise value actually uses the same one we were using here
-/// easier).
-fn uniform_noise(f: impl Fn(usize, Vec2<f64>) -> f32) -> InverseCdf {
-    let mut noise = (0..WORLD_SIZE.x * WORLD_SIZE.y)
-        .map(|i| {
-            (
-                i,
-                f(
-                    i,
-                    (uniform_idx_as_vec2(i) * TerrainChunkSize::SIZE.map(|e| e as i32))
-                        .map(|e| e as f64),
-                ),
-            )
-        })
-        .collect::<Vec<_>>();
-
-    // sort_unstable_by is equivalent to sort_by here since we include the index in the
-    // comparison.  We could leave out the index, but this might make the order not
-    // reproduce the same way between different versions of Rust (for example).
-    noise.sort_unstable_by(|f, g| (f.1, f.0).partial_cmp(&(g.1, g.0)).unwrap());
-
-    // Construct a vector that associates each chunk position with the 1-indexed
-    // position of the noise in the sorted vector (divided by the vector length).
-    // This guarantees a uniform distribution among the samples.
-    let mut uniform_noise = box [(0.0, 0.0); WORLD_SIZE.x * WORLD_SIZE.y];
-    let total = (WORLD_SIZE.x * WORLD_SIZE.y) as f32;
-    for (noise_idx, (chunk_idx, noise_val)) in noise.into_iter().enumerate() {
-        uniform_noise[chunk_idx] = ((1 + noise_idx) as f32 / total, noise_val);
-    }
-    uniform_noise
-}
-
 /// Calculates the smallest distance along an axis (x, y) from an edge of
 /// the world.  This value is maximal at WORLD_SIZE / 2 and minimized at the extremes
 /// (0 or WORLD_SIZE on one or more axes).  It then divides the quantity by cell_size,
@ -197,6 +46,9 @@ fn map_edge_factor(posi: usize) -> f32 {
        .min(1.0)
 }

+/// A structure that holds cached noise values and cumulative distribution functions for the input
+/// that led to those values.  See the definition of InverseCdf for a description of how to
+/// interpret the types of its fields.
 struct GenCdf {
    humid_base: InverseCdf,
    temp_base: InverseCdf,
@ -666,7 +518,7 @@ impl SimChunk {
        const HUMID_WEIGHTS: [f32; 2] = [1.0, 1.0];
        let humidity = cdf_irwin_hall(&HUMID_WEIGHTS, [humid_base, 1.0 - alt_uniform]);

-        let (temp_base, temp_old) = gen_cdf.temp_base[posi];
+        let (temp_base, _) = gen_cdf.temp_base[posi];

        // We also correlate temperature negatively with altitude using different weighting than we
        // use for humidity.
@ -695,8 +547,6 @@ impl SimChunk {
        let logistic_2_base = 3.0f32.sqrt().mul(f32::consts::FRAC_2_PI);
        // Assumes μ = 0, σ = 1
        let logistic_cdf = |x: f32| x.div(logistic_2_base).tanh().mul(0.5).add(0.5);
-        // Weighted logit sum.
-        let f = |humidity, density| logistic_cdf(logit(humidity) + 0.5 * logit(density));

        // No trees in the ocean or with zero humidity (currently)
        let tree_density = if alt <= CONFIG.sea_level + 5.0 {
@ -716,6 +566,7 @@ impl SimChunk {
            } else if humidity >= 1.0 || tree_density >= 1.0 {
                1.0
            } else {
+                // Weighted logit sum.
                logistic_cdf(logit(humidity) + 0.5 * logit(tree_density))
            }
            // rescale to (-0.9, 0.9)
--- a/world/src/sim/util.rs
+++ b/world/src/sim/util.rs
@ -0,0 +1,161 @@
+use common::{
+    terrain::TerrainChunkSize,
+    vol::VolSize,
+};
+use super::WORLD_SIZE;
+use vek::*;
+
+/// Computes the cumulative distribution function of the weighted sum of k independent,
+/// uniformly distributed random variables between 0 and 1.  For each variable i, we use weights[i]
+/// as the weight to give samples[i] (the weights should all be positive).
+///
+/// If the precondition is met, the distribution of the result of calling this function will be
+/// uniformly distributed while preserving the same information that was in the original average.
+///
+/// For N > 33 the function will no longer return correct results since we will overflow u32.
+///
+/// NOTE:
+///
+/// Per [1], the problem of determing the CDF of
+/// the sum of uniformly distributed random variables over *different* ranges is considerably more
+/// complicated than it is for the same-range case.  Fortunately, it also provides a reference to
+/// [2], which contains a complete derivation of an exact rule for the density function for
+/// this case.  The CDF is just the integral of the cumulative distribution function [3],
+/// which we use to convert this into a CDF formula.
+///
+/// This allows us to sum weighted, uniform, independent random variables.
+///
+/// At some point, we should probably contribute this back to stats-rs.
+///
+/// 1. https://www.r-bloggers.com/sums-of-random-variables/,
+/// 2. Sadooghi-Alvandi, S., A. Nematollahi, & R. Habibi, 2009.
+///    On the Distribution of the Sum of Independent Uniform Random Variables.
+///    Statistical Papers, 50, 171-175.
+/// 3. hhttps://en.wikipedia.org/wiki/Cumulative_distribution_function
+pub fn cdf_irwin_hall<const N: usize>(weights: &[f32; N], samples: [f32; N]) -> f32 {
+    // Let J_k = {(j_1, ... , j_k) : 1 ≤ j_1 < j_2 < ··· < j_k ≤ N }.
+    //
+    // Let A_N = Π{k = 1 to n}a_k.
+    //
+    // The density function for N ≥ 2 is:
+    //
+    //   1/(A_N * (N - 1)!) * (x^(N-1) + Σ{k = 1 to N}((-1)^k *
+    //   Σ{(j_1, ..., j_k) ∈ J_k}(max(0, x - Σ{l = 1 to k}(a_(j_l)))^(N - 1))))
+    //
+    // So the cumulative distribution function is its integral, i.e. (I think)
+    //
+    // 1/(product{k in A}(k) * N!) * (x^N + sum(k in 1 to N)((-1)^k *
+    // sum{j in Subsets[A, {k}]}(max(0, x - sum{l in j}(l))^N)))
+    //
+    // which is also equivalent to
+    //
+    //   (letting B_k = { a in Subsets[A, {k}] : sum {l in a} l }, B_(0,1) = 0 and
+    //            H_k = { i : 1 ≤ 1 ≤ N! / (k! * (N - k)!) })
+    //
+    //   1/(product{k in A}(k) * N!) * sum(k in 0 to N)((-1)^k *
+    //   sum{l in H_k}(max(0, x - B_(k,l))^N))
+    //
+    // We should be able to iterate through the whole power set
+    // instead, and figure out K by calling count_ones(), so we can compute the result in O(2^N)
+    // iterations.
+    let x: f32 = weights
+        .iter()
+        .zip(samples.iter())
+        .map(|(weight, sample)| weight * sample)
+        .sum();
+
+    let mut y = 0.0f32;
+    for subset in 0u32..(1 << N) {
+        // Number of set elements
+        let k = subset.count_ones();
+        // Add together exactly the set elements to get B_subset
+        let z = weights
+            .iter()
+            .enumerate()
+            .filter(|(i, _)| subset & (1 << i) as u32 != 0)
+            .map(|(_, k)| k)
+            .sum::<f32>();
+        // Compute max(0, x - B_subset)^N
+        let z = (x - z).max(0.0).powi(N as i32);
+        // The parity of k determines whether the sum is negated.
+        y += if k & 1 == 0 { z } else { -z };
+    }
+
+    // Divide by the product of the weights.
+    y /= weights.iter().product::<f32>();
+
+    // Remember to multiply by 1 / N! at the end.
+    y / (1..=N as i32).product::<i32>() as f32
+}
+
+/// First component of each element of the vector is the computed CDF of the noise function at this
+/// index (i.e. its position in a sorted list of value returned by the noise function applied to
+/// every chunk in the game).  Second component is the cached value of the noise function that
+/// generated the index.
+pub type InverseCdf = Box<[(f32, f32); WORLD_SIZE.x * WORLD_SIZE.y]>;
+
+/// Computes the position Vec2 of a SimChunk from an index, where the index was generated by
+/// uniform_noise.
+pub fn uniform_idx_as_vec2(idx: usize) -> Vec2<i32> {
+    Vec2::new((idx / WORLD_SIZE.x) as i32, (idx % WORLD_SIZE.x) as i32)
+}
+
+/// Compute inverse cumulative distribution function for arbitrary function f, the hard way.  We
+/// pre-generate noise values prior to worldgen, then sort them in order to determine the correct
+/// position in the sorted order.  That lets us use `(index + 1) / (WORLDSIZE.y * WORLDSIZE.x)` as
+/// a uniformly distributed (from almost-0 to 1) regularization of the chunks.  That is, if we
+/// apply the computed "function" F⁻¹(x, y) to (x, y) and get out p, it means that approximately
+/// (100 * p)% of chunks have a lower value for F⁻¹ than p.  The main purpose of doing this is to
+/// make sure we are using the entire range we want, and to allow us to apply the numerous results
+/// about distributions on uniform functions to the procedural noise we generate, which lets us
+/// much more reliably control the *number* of features in the world while still letting us play
+/// with the *shape* of those features, without having arbitrary cutoff points / discontinuities
+/// (which tend to produce ugly-looking / unnatural terrain).
+///
+/// As a concrete example, before doing this it was very hard to tweak humidity so that either most
+/// of the world wasn't dry, or most of it wasn't wet, by combining the billow noise function and
+/// the computed altitude.  This is because the billow noise function has a very unusual
+/// distribution that is heavily skewed towards 0.  By correcting for this tendency, we can start
+/// with uniformly distributed billow noise and altitudes and combine them to get uniformly
+/// distributed humidity, while still preserving the existing shapes that the billow noise and
+/// altitude functions produce.
+///
+/// f takes an index, which represents the index corresponding to this chunk in any any SimChunk
+/// vector returned by uniform_noise, and (for convenience) the float-translated version of those
+/// coordinates.
+/// f should return a value with no NaNs.  If there is a NaN, it will panic.  There are no other
+/// conditions on f.
+///
+/// Returns a vec of (f32, f32) pairs consisting of the percentage of chunks with a value lower than
+/// this one, and the actual noise value (we don't need to cache it, but it makes ensuring that
+/// subsequent code that needs the noise value actually uses the same one we were using here
+/// easier).
+pub fn uniform_noise(f: impl Fn(usize, Vec2<f64>) -> f32) -> InverseCdf {
+    let mut noise = (0..WORLD_SIZE.x * WORLD_SIZE.y)
+        .map(|i| {
+            (
+                i,
+                f(
+                    i,
+                    (uniform_idx_as_vec2(i) * TerrainChunkSize::SIZE.map(|e| e as i32))
+                        .map(|e| e as f64),
+                ),
+            )
+        })
+        .collect::<Vec<_>>();
+
+    // sort_unstable_by is equivalent to sort_by here since we include the index in the
+    // comparison.  We could leave out the index, but this might make the order not
+    // reproduce the same way between different versions of Rust (for example).
+    noise.sort_unstable_by(|f, g| (f.1, f.0).partial_cmp(&(g.1, g.0)).unwrap());
+
+    // Construct a vector that associates each chunk position with the 1-indexed
+    // position of the noise in the sorted vector (divided by the vector length).
+    // This guarantees a uniform distribution among the samples.
+    let mut uniform_noise = Box::new([(0.0, 0.0); WORLD_SIZE.x * WORLD_SIZE.y]);
+    let total = (WORLD_SIZE.x * WORLD_SIZE.y) as f32;
+    for (noise_idx, (chunk_idx, noise_val)) in noise.into_iter().enumerate() {
+        uniform_noise[chunk_idx] = ((1 + noise_idx) as f32 / total, noise_val);
+    }
+    uniform_noise
+}