Improving meshing performance.

2024-08-30 18:12:32 +00:00 · 2022-08-13 23:48:50 -07:00 · 2022-08-13 23:48:50 -07:00 · 706084dd15
commit 706084dd15
parent 9bb3681f55
23 changed files with 996 additions and 293 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -155,6 +155,11 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "arbalest"
+version = "0.2.1"
+source = "git+https://gitlab.com/veloren/arbalest.git?rev=9cb8f67a4f6d8f3cc908dac4eb5eb8aec9fab07b#9cb8f67a4f6d8f3cc908dac4eb5eb8aec9fab07b"
+
 [[package]]
 name = "arr_macro"
 version = "0.1.3"
@ -6510,6 +6515,7 @@ name = "veloren-common"
 version = "0.10.0"
 dependencies = [
 "approx 0.4.0",
+ "arbalest",
 "bitflags",
 "bitvec",
 "bumpalo",
@ -6824,6 +6830,7 @@ dependencies = [
 "assets_manager",
 "backtrace",
 "bincode",
+ "bumpalo",
 "bytemuck",
 "chrono",
 "chumsky",
--- a/assets/common/abilities/debug/forwardboost.ron
+++ b/assets/common/abilities/debug/forwardboost.ron
@ -1,6 +1,6 @@
 Boost(
    movement_duration: 0.05,
    only_up: false,
-    speed: 400.0,
+    speed: 3400.0,
    max_exit_velocity: 100.0,
 )
--- a/assets/world/features.ron
+++ b/assets/world/features.ron
@ -3,7 +3,7 @@

 (
    caverns: false, // TODO: Disabled by default until cave overhaul
-    caves: true,
+    caves: false,
    rocks: true,
    shrubs: true,
    trees: true,
--- a/client/src/lib.rs
+++ b/client/src/lib.rs
@ -280,7 +280,8 @@ pub struct CharacterList {
    pub loading: bool,
 }

-const TOTAL_PENDING_CHUNKS_LIMIT: usize = 1024;
+/// Higher than what's needed at VD = 65.
+const TOTAL_PENDING_CHUNKS_LIMIT: usize = /*1024*/13800;

 impl Client {
    pub async fn new(
@ -1874,8 +1875,8 @@ impl Client {
                            if !skip_mode && !self.pending_chunks.contains_key(key) {
                                const CURRENT_TICK_PENDING_CHUNKS_LIMIT: usize = 8 * 4;
                                if self.pending_chunks.len() < TOTAL_PENDING_CHUNKS_LIMIT
-                                    && current_tick_send_chunk_requests
-                                        < CURRENT_TICK_PENDING_CHUNKS_LIMIT
+                                    && /* current_tick_send_chunk_requests
+                                        < CURRENT_TICK_PENDING_CHUNKS_LIMIT */true
                                {
                                    self.send_msg_err(ClientGeneral::TerrainChunkRequest {
                                        key: *key,
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@ -79,6 +79,7 @@ petgraph = { version = "0.6", optional = true }
 kiddo = { version = "0.1", optional = true }

 # Data structures
+arbalest = { git = "https://gitlab.com/veloren/arbalest.git", rev = "9cb8f67a4f6d8f3cc908dac4eb5eb8aec9fab07b", features = ["nightly"] }
 hashbrown = { version = "0.12", features = ["rayon", "serde", "nightly"] }
 slotmap = { version = "1.0", features = ["serde"] }
 indexmap = { version = "1.3.0", features = ["rayon"] }
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@ -8,6 +8,8 @@
    array_chunks,
    associated_type_defaults,
    bool_to_option,
+    coerce_unsized,
+    dispatch_from_dyn,
    fundamental,
    generic_const_exprs,
    generic_arg_infer,
@ -17,6 +19,7 @@
    slice_as_chunks,
    trait_alias,
    type_alias_impl_trait,
+    unsize,
    extend_one,
    arbitrary_enum_discriminant,
    generic_associated_types,
--- a/common/src/slowjob.rs
+++ b/common/src/slowjob.rs
@ -1,4 +1,11 @@
-use hashbrown::HashMap;
+use arbalest::sync::{Strong, Frail};
+use core::{
+    fmt,
+    marker::Unsize,
+    ops::{CoerceUnsized, DispatchFromDyn},
+    sync::atomic::{AtomicBool, Ordering}
+};
+use hashbrown::{hash_map::Entry, HashMap};
 use rayon::ThreadPool;
 use std::{
    collections::VecDeque,
@ -47,25 +54,27 @@ use tracing::{error, warn};
 #[derive(Clone)]
 pub struct SlowJobPool {
    internal: Arc<Mutex<InternalSlowJobPool>>,
+    threadpool: Arc<ThreadPool>,
 }

+type Name = /*String*/&'static str;
+
 #[derive(Debug)]
 pub struct SlowJob {
-    name: String,
-    id: u64,
+    task: Frail<Queue>,
 }

+// impl<T: ?Sized + Unsize<U> + CoerceUnsized<U>, U: ?Sized> CoerceUnsized<Task<U>> for Task<T> {}
+
 struct InternalSlowJobPool {
-    next_id: u64,
-    queue: HashMap<String, VecDeque<Queue>>,
-    configs: HashMap<String, Config>,
-    last_spawned_configs: Vec<String>,
+    cur_slot: usize,
+    queue: HashMap<Name, VecDeque<Strong<Queue>>>,
+    configs: HashMap<Name, Config>,
+    last_spawned_configs: Vec<Name>,
    global_spawned_and_running: u64,
    global_limit: u64,
    jobs_metrics_cnt: usize,
-    jobs_metrics: HashMap<String, Vec<JobMetrics>>,
-    threadpool: Arc<ThreadPool>,
-    internal: Option<Arc<Mutex<Self>>>,
+    jobs_metrics: HashMap<Name, Vec<JobMetrics>>,
 }

 #[derive(Debug)]
@ -74,46 +83,42 @@ struct Config {
    local_spawned_and_running: u64,
 }

-struct Queue {
-    id: u64,
-    name: String,
-    task: Box<dyn FnOnce() + Send + Sync + 'static>,
+#[derive(Debug)]
+struct Task<F: ?Sized> {
+    queue_created: Instant,
+    /// Has this task been canceled?
+    is_canceled: AtomicBool,
+    /// The actual task function.  Technically, the Option is unnecessary, since we'll only ever
+    /// run it once, but the performance improvement doesn't justify unsafe in this case.
+    task: F,
 }

+/// NOTE: Should be FnOnce, but can't because there's no easy way to run an FnOnce function on an
+/// Arc even if [try_unwrap] would work.  We could write unsafe code to do this, but it probably
+/// isn't worth it.
+type Queue = Task<dyn FnMut() + Send + Sync + 'static>;
+
 pub struct JobMetrics {
    pub queue_created: Instant,
    pub execution_start: Instant,
    pub execution_end: Instant,
 }

-impl Queue {
-    fn new<F>(name: &str, id: u64, internal: &Arc<Mutex<InternalSlowJobPool>>, f: F) -> Self
-    where
-        F: FnOnce() + Send + Sync + 'static,
+impl<F> Task<F> {
+    fn new(f: F) -> Task<impl FnMut() + Send + Sync + 'static>
+        where F: FnOnce() + Send + Sync + 'static
    {
-        let internal = Arc::clone(internal);
-        let name_cloned = name.to_owned();
        let queue_created = Instant::now();
-        Self {
-            id,
-            name: name.to_owned(),
-            task: Box::new(move || {
-                common_base::prof_span_alloc!(_guard, &name_cloned);
-                let execution_start = Instant::now();
-                f();
-                let execution_end = Instant::now();
-                let metrics = JobMetrics {
+        let mut f = Some(f);
+        Task {
            queue_created,
-                    execution_start,
-                    execution_end,
-                };
-                // directly maintain the next task afterwards
-                {
-                    let mut lock = internal.lock().expect("slowjob lock poisoned");
-                    lock.finish(&name_cloned, metrics);
-                    lock.spawn_queued();
+            is_canceled: AtomicBool::new(false),
+            task: move || {
+                // Working around not being able to call FnOnce in an Arc.
+                if let Some(f) = f.take() {
+                    f();
                }
-            }),
+            },
        }
    }
 }
@ -122,41 +127,33 @@ impl InternalSlowJobPool {
    pub fn new(
        global_limit: u64,
        jobs_metrics_cnt: usize,
-        threadpool: Arc<ThreadPool>,
-    ) -> Arc<Mutex<Self>> {
-        let link = Arc::new(Mutex::new(Self {
-            next_id: 0,
+    ) -> Self {
+        Self {
            queue: HashMap::new(),
            configs: HashMap::new(),
+            cur_slot: 0,
            last_spawned_configs: Vec::new(),
            global_spawned_and_running: 0,
            global_limit: global_limit.max(1),
            jobs_metrics_cnt,
            jobs_metrics: HashMap::new(),
-            threadpool,
-            internal: None,
-        }));
-
-        let link_clone = Arc::clone(&link);
-        link.lock()
-            .expect("poisoned on InternalSlowJobPool::new")
-            .internal = Some(link_clone);
-        link
+        }
    }

    /// returns order of configuration which are queued next
    fn calc_queued_order(
        &self,
-        mut queued: HashMap<&String, u64>,
+        mut queued: HashMap<&Name, u64>,
        mut limit: usize,
-    ) -> Vec<String> {
+    ) -> Vec<Name> {
        let mut roundrobin = self.last_spawned_configs.clone();
        let mut result = vec![];
-        let spawned = self
+        /* let spawned = self
            .configs
            .iter()
            .map(|(n, c)| (n, c.local_spawned_and_running))
-            .collect::<HashMap<_, u64>>();
+            .collect::<HashMap<_, u64>>(); */
+        let spawned = &self.configs;
        let mut queried_capped = self
            .configs
            .iter()
@ -174,7 +171,7 @@ impl InternalSlowJobPool {
        // grab all configs that are queued and not running. in roundrobin order
        for n in roundrobin.clone().into_iter() {
            if let Some(c) = queued.get_mut(&n) {
-                if *c > 0 && spawned.get(&n).cloned().unwrap_or(0) == 0 {
+                if *c > 0 && spawned.get(&n).map(|c| c.local_spawned_and_running).unwrap_or(0) == 0 {
                    result.push(n.clone());
                    *c -= 1;
                    limit -= 1;
@ -212,7 +209,7 @@ impl InternalSlowJobPool {
            match spawn_rates.first_mut() {
                Some((n, r)) => {
                    if *r > f32::EPSILON {
-                        result.push(n.clone());
+                        result.push(n.to_owned());
                        limit -= 1;
                        *r -= 1.0;
                    } else {
@ -225,7 +222,7 @@ impl InternalSlowJobPool {
        result
    }

-    fn can_spawn(&self, name: &str) -> bool {
+    fn can_spawn(&self, name: &Name) -> bool {
        let queued = self
            .queue
            .iter()
@ -247,31 +244,33 @@ impl InternalSlowJobPool {
        to_be_queued_cnt > queued_cnt
    }

-    pub fn spawn<F>(&mut self, name: &str, f: F) -> SlowJob
+    fn spawn<F>(&mut self, slowjob: &SlowJobPool, push_back: bool, name: &Name, f: F) -> SlowJob
    where
        F: FnOnce() + Send + Sync + 'static,
    {
-        let id = self.next_id;
-        self.next_id += 1;
-        let queue = Queue::new(name, id, self.internal.as_ref().expect("internal empty"), f);
-        self.queue
-            .entry(name.to_string())
-            .or_default()
-            .push_back(queue);
+        let queue: Strong<Queue> = Strong::new(Task::new(f));
+        let mut deque = self.queue
+            .entry(name.to_owned())
+            .or_default();
+        let job = SlowJob {
+            task: Strong::downgrade(&queue)
+        };
+        if push_back {
+            deque.push_back(queue);
+        } else {
+            deque.push_front(queue);
+        }
        debug_assert!(
            self.configs.contains_key(name),
            "Can't spawn unconfigured task!"
        );
        //spawn already queued
-        self.spawn_queued();
-        SlowJob {
-            name: name.to_string(),
-            id,
-        }
+        self.spawn_queued(slowjob);
+        job
    }

-    fn finish(&mut self, name: &str, metrics: JobMetrics) {
-        let metric = self.jobs_metrics.entry(name.to_string()).or_default();
+    fn finish(&mut self, name: &Name, metrics: JobMetrics) {
+        let metric = self.jobs_metrics.entry(name.to_owned()).or_default();

        if metric.len() < self.jobs_metrics_cnt {
            metric.push(metrics);
@ -284,8 +283,87 @@ impl InternalSlowJobPool {
        }
    }

-    fn spawn_queued(&mut self) {
-        let queued = self
+    /// NOTE: This does not spawn the job directly, but it *does* increment cur_slot and the local
+    /// and global task counters, so make sure to actually finish the returned jobs if you consume
+    /// the iterator, or the position in the queue may be off!
+    #[must_use = "Remember to actually use the returned jobs if you consume the iterator."]
+    fn next_jobs<'a>(&'a mut self) -> impl Iterator<Item = (Name, Strong<Queue>)> + 'a {
+        let queued = &mut self.queue;
+        let configs = &mut self.configs;
+        let global_spawned_and_running = &mut self.global_spawned_and_running;
+
+        let cur_slot = &mut self.cur_slot;
+        let num_slots = self.last_spawned_configs.len();
+        let jobs_limit = self.global_limit.saturating_sub(*global_spawned_and_running) as usize;
+
+        let queued_order = self.last_spawned_configs.iter().cycle().skip(*cur_slot).take(num_slots);
+        queued_order
+            // NOTE: num_slots > 0, because queued_order can only yield up to num_slots elements.
+            .inspect(move |_| { *cur_slot = (*cur_slot + 1) % num_slots; })
+            .filter_map(move |name| {
+                let deque = queued.get_mut(name)?;
+                let config = configs.get_mut(name)?;
+                if /* config.local_spawned_and_running < config.local_limit*/true {
+                    let task = deque.pop_front()?;
+                    config.local_spawned_and_running += 1;
+                    *global_spawned_and_running += 1;
+                    Some((name.to_owned(), task))
+                } else {
+                    None
+                }
+            })
+            .take(jobs_limit)
+    }
+
+    /// Spawn tasks in the threadpool, in round-robin order.
+    ///
+    /// NOTE: Do *not* call this from an existing thread in the threadpool.
+    fn spawn_queued(&mut self, slowjob: &SlowJobPool) {
+        /* let old_running = self.global_spawned_and_running; */
+        while self.next_jobs().map(|task| slowjob.spawn_in_threadpool(task)).count() != 0 {}
+        /* let total_spawned = (self.global_spawned_and_running - old_running) as usize;
+        self.cur_slot = (initial_slot + total_spawned) % num_slots;
+        self.cur_slot %= num_slots; */
+        /* let queued = self
+            .queue
+            .iter_mut();
+            /* .iter();
+            .map(|(n, m)| (n, m.len() as u64))
+            .collect::<HashMap<_, u64>>();
+        let limit = self.global_limit as usize;
+        let queued_order = self.calc_queued_order(queued, limit); */
+
+        let queued_order = queued;
+        for (name, deque) in queued_order/*.into_iter()*/.take(self.global_limit.saturating_sub(self.global_spawned_and_running) as usize) {
+            /* match self.queue.get_mut(&name) {
+                Some(deque) => */match deque.pop_front() {
+                    Some(queue) => {
+                        //fire
+                        self.global_spawned_and_running += 1;
+                        self.configs
+                            .get_mut(&queue.name)
+                            .expect("cannot fire a unconfigured job")
+                            .local_spawned_and_running += 1;
+                        self.last_spawned_configs
+                            .iter()
+                            .position(|e| e == &queue.name)
+                            .map(|i| self.last_spawned_configs.remove(i));
+                        self.last_spawned_configs.push((&queue.name).to_owned());
+                        self.threadpool.spawn(queue.task);
+                    },
+                    None => /* error!(
+                        "internal calculation is wrong, we extected a schedulable job to be \
+                         present in the queue"
+                    ),*/{}
+                }/*,
+                None => error!(
+                    "internal calculation is wrong, we marked a queue as schedulable which \
+                     doesn't exist"
+                ),
+            } */
+        } */
+
+        /* let queued = self
            .queue
            .iter()
            .map(|(n, m)| (n, m.len() as u64))
@ -306,7 +384,7 @@ impl InternalSlowJobPool {
                            .iter()
                            .position(|e| e == &queue.name)
                            .map(|i| self.last_spawned_configs.remove(i));
-                        self.last_spawned_configs.push(queue.name.to_owned());
+                        self.last_spawned_configs.push((&queue.name).to_owned());
                        self.threadpool.spawn(queue.task);
                    },
                    None => error!(
@ -319,77 +397,186 @@ impl InternalSlowJobPool {
                     doesn't exist"
                ),
            }
-        }
+        } */
    }

-    pub fn take_metrics(&mut self) -> HashMap<String, Vec<JobMetrics>> {
+    pub fn take_metrics(&mut self) -> HashMap<Name, Vec<JobMetrics>> {
        core::mem::take(&mut self.jobs_metrics)
    }
 }

+
+impl SlowJob {
+    /// TODO: This would be simplified (and perform a bit better) if there existed a structure that
+    /// "split" an Arc allocation into two parts, a shared and owned part (with types corresponding
+    /// to references to each).  The strong type would not be cloneable and would grant mutable
+    /// access to the owned part, and shared access to the shared part; the weak type would be
+    /// cloneable, but would only shared access to the shared part, and no access to the owned
+    /// part.  This would allow us to share the allocation, without needing to keep track of an
+    /// explicit weak pointer count, perform any sort of locking on cancelation, etc.
+    /// Unfortunately I cannot find such a type on crates.io, and writing one would be a fairly
+    /// involved task, so we defer this for now.
+    pub fn cancel(self) -> Result<(), Self> {
+        // Correctness of cancellation is a bit subtle, due to wanting to avoid locking the queue
+        // more than necessary, iterate over jobs, or introduce a way to access jobs by key.
+        //
+        // First, we try to upgrade our weak reference to the Arc.  This will fail if the strong
+        // reference is currently mutably borrowed, or if the strong side has already been
+        // dropped.  Since we never mutably borrow the reference until we're definitely going to
+        // run the task, and we only drop the strong side after the task is complete, this is
+        // a conservative signal that there's no point in cancelling the task, so this has no
+        // false positives.
+        let task = self.task.try_upgrade().or(Err(self))?;
+        // Now that the task is upgraded, any attempt by the strong side to mutably access the
+        // task will fail, so it will assume it's been canceled.  This is fine, because we're
+        // about to cancel it anyway.
+        //
+        // Next, we try to signal (monotonically) that the task should be cancelled, by updating
+        // the value of canceled atomically to true.  Since this is monotonic, we can use Relaxed
+        // here.  It would actually be fine if this signal was lost, since cancellation is always
+        // an optimization, but with the current implementation it won't be--the strong side only
+        // checks for cancellation after it tries to mutably access the task, which can't happen
+        // while the task is "locked" by our weak upgrade, so our write here will always be
+        // visible.
+        task.is_canceled.store(true, Ordering::Relaxed);
+        // Note that we don't bother to check whether the task was already canceled.  Firstly,
+        // because we don't care, secondly because even if we did, this function takes ownership of
+        // the SlowJob, which contains the only weak reference with the ability to cancel, so no
+        // job can be canceled more than once anyway.
+        Ok(())
+    }
+}
+
 impl SlowJobPool {
    pub fn new(global_limit: u64, jobs_metrics_cnt: usize, threadpool: Arc<ThreadPool>) -> Self {
        Self {
-            internal: InternalSlowJobPool::new(global_limit, jobs_metrics_cnt, threadpool),
+            internal: Arc::new(Mutex::new(InternalSlowJobPool::new(global_limit, jobs_metrics_cnt))),
+            threadpool,
        }
    }

    /// configure a NAME to spawn up to f(n) threads, depending on how many
    /// threads we globally have available
-    pub fn configure<F>(&self, name: &str, f: F)
+    pub fn configure<F>(&self, name: &Name, f: F)
    where
        F: Fn(u64) -> u64,
    {
        let mut lock = self.internal.lock().expect("lock poisoned while configure");
+        let lock = &mut *lock;
+        // Make sure not to update already-present config entries, since this can mess up some of
+        // the stuff we do that assumes monotonicity.
+        if let Entry::Vacant(v) = lock.configs.entry(name.to_owned()) {
            let cnf = Config {
                local_limit: f(lock.global_limit).max(1),
                local_spawned_and_running: 0,
            };
-        lock.configs.insert(name.to_owned(), cnf);
-        lock.last_spawned_configs.push(name.to_owned());
+            let cnf = v.insert(cnf);
+            // Add cnf into the entry list as many times as its local limit, to ensure that stuff
+            // gets run more often if it has more assigned threads.
+            lock.last_spawned_configs.resize(lock.last_spawned_configs.len() + /* cnf.local_limit as usize */1, name.to_owned());
+        }
+    }
+
+    /// Spawn a task in the threadpool.
+    ///
+    /// This runs the task, and then checks at the end to see if there are any more tasks to run
+    /// before returning for good.  In cases with lots of tasks, this may help avoid unnecessary
+    /// context switches or extra threads being spawned unintentionally.
+    fn spawn_in_threadpool(&self, mut name_task: (Name, Strong<Queue>)) {
+        let internal = Arc::clone(&self.internal);
+
+        // NOTE: It's important not to use internal until we're in the spawned thread, since the
+        // lock is probably currently taken!
+        self.threadpool.spawn(move || {
+            // Repeatedly run until exit; we do things this way to avoid recursion, which might blow
+            // our call stack.
+            loop {
+                let (name, mut task) = name_task;
+                let queue_created = task.queue_created;
+                // See the [SlowJob::cancel] method for justification for this step's correctness.
+                //
+                // NOTE: This is not exact because we do it before borrowing the task, but the
+                // difference is minor and it makes it easier to assign metrics to canceled tasks
+                // (though maybe we don't want to do that?).
+                let execution_start = Instant::now();
+                if let Some(mut task) = Strong::try_borrow_mut(&mut task)
+                    .ok()
+                    .filter(|task| !task.is_canceled.load(Ordering::Relaxed)) {
+                    // The task was not canceled.
+                    //
+                    // Run the task in its own scope so perf works correctly.
+                    common_base::prof_span_alloc!(_guard, &name);
+                    (task.task)();
+                }
+                let execution_end = Instant::now();
+                let metrics = JobMetrics {
+                    queue_created,
+                    execution_start,
+                    execution_end,
+                };
+                // directly maintain the next task afterwards
+                let next_task = {
+                    // We take the lock in this scope to make sure it's dropped before we
+                    // actully launch the next job.
+                    let mut lock = internal.lock().expect("slowjob lock poisoned");
+                    let lock = &mut *lock;
+                    lock.finish(&name, metrics);
+                    let mut jobs = lock.next_jobs();
+                    jobs.next()
+                };
+                name_task = if let Some(name_task) = next_task {
+                    // We launch the job on the *same* thread, since we're already in the
+                    // thread pool.
+                    name_task
+                } else {
+                    // There are no more tasks to run at this time, so we exit the thread in
+                    // the threadpool (in theory, it might make sense to yield or spin a few
+                    // times or something in case we have more tasks to execute).
+                    return;
+                };
+            }
+        });
    }

    /// spawn a new slow job on a certain NAME IF it can run immediately
    #[allow(clippy::result_unit_err)]
-    pub fn try_run<F>(&self, name: &str, f: F) -> Result<SlowJob, ()>
+    pub fn try_run<F>(&self, name: &Name, f: F) -> Result<SlowJob, ()>
    where
        F: FnOnce() + Send + Sync + 'static,
    {
        let mut lock = self.internal.lock().expect("lock poisoned while try_run");
+        let lock = &mut *lock;
        //spawn already queued
-        lock.spawn_queued();
+        lock.spawn_queued(self);
        if lock.can_spawn(name) {
-            Ok(lock.spawn(name, f))
+            Ok(lock.spawn(self, true, name, f))
        } else {
            Err(())
        }
    }

-    pub fn spawn<F>(&self, name: &str, f: F) -> SlowJob
+    pub fn spawn<F>(&self, name: &Name, f: F) -> SlowJob
    where
        F: FnOnce() + Send + Sync + 'static,
    {
        self.internal
            .lock()
            .expect("lock poisoned while spawn")
-            .spawn(name, f)
+            .spawn(self, true, name, f)
    }

-    pub fn cancel(&self, job: SlowJob) -> Result<(), SlowJob> {
-        let mut lock = self.internal.lock().expect("lock poisoned while cancel");
-        if let Some(m) = lock.queue.get_mut(&job.name) {
-            let p = match m.iter().position(|p| p.id == job.id) {
-                Some(p) => p,
-                None => return Err(job),
-            };
-            if m.remove(p).is_some() {
-                return Ok(());
-            }
-        }
-        Err(job)
+    /// Spawn at the front of the queue, which is preferrable in some cases.
+    pub fn spawn_front<F>(&self, name: &Name, f: F) -> SlowJob
+    where
+        F: FnOnce() + Send + Sync + 'static,
+    {
+        self.internal
+            .lock()
+            .expect("lock poisoned while spawn")
+            .spawn(self, false, name, f)
    }

-    pub fn take_metrics(&self) -> HashMap<String, Vec<JobMetrics>> {
+    pub fn take_metrics(&self) -> HashMap<Name, Vec<JobMetrics>> {
        self.internal
            .lock()
            .expect("lock poisoned while take_metrics")
@ -413,7 +600,7 @@ mod tests {
            .num_threads(pool_threads)
            .build()
            .unwrap();
-        let pool = SlowJobPool::new(global_threads, metrics, Arc::new(threadpool));
+        let pool = SlowJobPool::new(global_threads, metrics, threadpool);
        if foo != 0 {
            pool.configure("FOO", |x| x / foo);
        }
--- a/server/src/chunk_generator.rs
+++ b/server/src/chunk_generator.rs
@ -55,7 +55,7 @@ impl ChunkGenerator {
        v.insert(Arc::clone(&cancel));
        let chunk_tx = self.chunk_tx.clone();
        self.metrics.chunks_requested.inc();
-        slowjob_pool.spawn("CHUNK_GENERATOR", move || {
+        slowjob_pool.spawn(&"CHUNK_GENERATOR", move || {
            let index = index.as_index_ref();
            let payload = world
                .generate_chunk(index, key, || cancel.load(Ordering::Relaxed), Some(time))
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@ -293,8 +293,8 @@ impl Server {
        }
        {
            let pool = state.ecs_mut().write_resource::<SlowJobPool>();
-            pool.configure("CHUNK_GENERATOR", |n| n / 2 + n / 4);
-            pool.configure("CHUNK_SERIALIZER", |n| n / 2);
+            pool.configure(&"CHUNK_GENERATOR", |n| n / 2 + n / 4);
+            pool.configure(&"CHUNK_SERIALIZER", |n| n / 2);
        }
        state
            .ecs_mut()
--- a/server/src/sys/chunk_serialize.rs
+++ b/server/src/sys/chunk_serialize.rs
@ -117,7 +117,7 @@ impl<'a> System<'a> for Sys {
        while chunks_iter.peek().is_some() {
            let chunks: Vec<_> = chunks_iter.by_ref().take(CHUNK_SIZE).collect();
            let chunk_sender = chunk_sender.clone();
-            slow_jobs.spawn("CHUNK_SERIALIZER", move || {
+            slow_jobs.spawn(&"CHUNK_SERIALIZER", move || {
                for (chunk, chunk_key, mut meta) in chunks {
                    let msg = Client::prepare_chunk_update_msg(
                        ServerGeneral::TerrainChunkUpdate {
--- a/voxygen/Cargo.toml
+++ b/voxygen/Cargo.toml
@ -95,6 +95,7 @@ server = { package = "veloren-server", path = "../server", optional = true, defa
 assets_manager = {version = "0.8", features = ["ab_glyph"]}
 backtrace = "0.3.40"
 bincode = "1.3.1"
+bumpalo = "3.10.0"
 chrono = { version = "0.4.19", features = ["serde"] }
 chumsky = "0.8"
 cpal = "0.13"
--- a/voxygen/src/ecs/mod.rs
+++ b/voxygen/src/ecs/mod.rs
@ -11,9 +11,9 @@ pub fn init(world: &mut World) {

    {
        let pool = world.read_resource::<SlowJobPool>();
-        pool.configure("IMAGE_PROCESSING", |n| n / 2);
-        pool.configure("FIGURE_MESHING", |n| n / 2);
-        pool.configure("TERRAIN_MESHING", |n| n / 2);
+        pool.configure(&"IMAGE_PROCESSING", |n| n / 2);
+        pool.configure(&"FIGURE_MESHING", |n| n / 4);
+        pool.configure(&"TERRAIN_MESHING", |n| n / 2);
    }

    // Voxygen event buses
--- a/voxygen/src/hud/mod.rs
+++ b/voxygen/src/hud/mod.rs
@ -4295,11 +4295,10 @@ impl Hud {
        // TODO: using a thread pool in the obvious way for speeding up map zoom results
        // in flickering artifacts, figure out a better way to make use of the
        // thread pool
-        let _pool = client.state().ecs().read_resource::<SlowJobPool>();
+        let pool = client.state().slow_job_pool();
        self.ui.maintain(
            global_state.window.renderer_mut(),
-            None,
-            //Some(&pool),
+            Some(&pool),
            Some(proj_mat * view_mat * Mat4::translation_3d(-focus_off)),
        );

--- a/voxygen/src/lib.rs
+++ b/voxygen/src/lib.rs
@ -4,6 +4,7 @@
 #![deny(clippy::clone_on_ref_ptr)]
 #![feature(
    array_methods,
+    array_windows,
    array_zip,
    bool_to_option,
    drain_filter,
--- a/voxygen/src/menu/char_selection/ui/mod.rs
+++ b/voxygen/src/menu/char_selection/ui/mod.rs
@ -1728,12 +1728,13 @@ impl CharSelectionUi {
    pub fn maintain(&mut self, global_state: &mut GlobalState, client: &Client) -> Vec<Event> {
        let mut events = Vec::new();
        let i18n = global_state.i18n.read();
+        let pool = client.state().slow_job_pool();

        let (mut messages, _) = self.ui.maintain(
            self.controls
                .view(&global_state.settings, client, &self.error, &i18n),
            global_state.window.renderer_mut(),
-            None,
+            Some(&pool),
            &mut global_state.clipboard,
        );

--- a/voxygen/src/mesh/greedy.rs
+++ b/voxygen/src/mesh/greedy.rs
@ -9,7 +9,7 @@ type TodoRect = (
    Vec3<i32>,
 );

-pub struct GreedyConfig<D, FA, FL, FG, FO, FS, FP, FT> {
+pub struct GreedyConfig<D, FV, FA, FL, FG, FO, FS, FP, FT> {
    pub data: D,
    /// The minimum position to mesh, in the coordinate system used
    /// for queries against the volume.
@ -31,6 +31,9 @@ pub struct GreedyConfig<D, FA, FL, FG, FO, FS, FP, FT> {
    /// the number of *horizontal* planes large enough to cover the whole
    /// chunk.
    pub greedy_size_cross: Vec3<usize>,
+    /// Given a position, return the full information for the voxel at that
+    /// position.
+    pub get_vox: FV,
    /// Given a position, return the AO information for the voxel at that
    /// position (0.0 - 1.0).
    pub get_ao: FA,
@ -104,7 +107,23 @@ fn guillotiere_size<T: Into<i32>>(size: Vec2<T>) -> guillotiere::Size {
    guillotiere::Size::new(size.x.into(), size.y.into())
 }

-/// Currently used by terrain/particles/figures
+/// Currently used by terrain
+pub fn terrain_config() -> guillotiere::AllocatorOptions {
+    // TODO: Collect information to see if we can choose a good value here. These
+    // current values were optimized for sprites, but we are using a
+    // different allocator for them so different values might be better
+    // here.
+    let large_size_threshold = /*16*//*32*/8; //256.min(min_max_dim / 2 + 1);
+    let small_size_threshold = /*4*//*4*/3; //33.min(large_size_threshold / 2 + 1);
+
+    guillotiere::AllocatorOptions {
+        alignment: guillotiere::Size::new(1, 1),
+        small_size_threshold,
+        large_size_threshold,
+    }
+}
+
+/// Currently used by particles/figures
 pub fn general_config() -> guillotiere::AllocatorOptions {
    // TODO: Collect information to see if we can choose a good value here. These
    // current values were optimized for sprites, but we are using a
@ -370,15 +389,16 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> {
    /// Returns an estimate of the bounds of the current meshed model.
    ///
    /// For more information on the config parameter, see [GreedyConfig].
-    pub fn push<M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT>(
+    pub fn push<M: PartialEq, D: 'a, V: 'a, FV, FA, FL, FG, FO, FS, FP, FT>(
        &mut self,
-        config: GreedyConfig<D, FA, FL, FG, FO, FS, FP, FT>,
+        config: GreedyConfig<D, FV, FA, FL, FG, FO, FS, FP, FT>,
    ) where
+        FV: for<'r> FnMut(&'r mut D, Vec3<i32>) -> V + 'a,
        FA: for<'r> FnMut(&'r mut D, Vec3<i32>) -> f32 + 'a,
        FL: for<'r> FnMut(&'r mut D, Vec3<i32>) -> f32 + 'a,
        FG: for<'r> FnMut(&'r mut D, Vec3<i32>) -> f32 + 'a,
        FO: for<'r> FnMut(&'r mut D, Vec3<i32>) -> bool + 'a,
-        FS: for<'r> FnMut(&'r mut D, Vec3<i32>, Vec3<i32>, Vec2<Vec3<i32>>) -> Option<(bool, M)>,
+        FS: for<'r> FnMut(&'r mut D, Vec3<i32>, V, V, /*Vec3<i32>, */Vec2<Vec3<i32>>) -> Option<(bool, M)>,
        FP: FnMut(Vec2<u16>, Vec2<Vec2<u16>>, Vec3<f32>, Vec2<Vec3<f32>>, Vec3<f32>, &M),
        FT: for<'r> FnMut(&'r mut D, Vec3<i32>, u8, u8, bool) -> [u8; 4] + 'a,
    {
@ -418,7 +438,7 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> {
    pub fn max_size(&self) -> Vec2<u16> { self.max_size }
 }

-fn greedy_mesh<'a, M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT, Allocator: AtlasAllocator>(
+fn greedy_mesh<'a, M: PartialEq, D: 'a, V: 'a, FV, FA, FL, FG, FO, FS, FP, FT, Allocator: AtlasAllocator>(
    atlas: &mut Allocator,
    col_lights_size: &mut Vec2<u16>,
    max_size: Vec2<u16>,
@ -427,6 +447,7 @@ fn greedy_mesh<'a, M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT, Allocator: A
        draw_delta,
        greedy_size,
        greedy_size_cross,
+        mut get_vox,
        get_ao,
        get_light,
        get_glow,
@ -434,14 +455,15 @@ fn greedy_mesh<'a, M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT, Allocator: A
        mut should_draw,
        mut push_quad,
        make_face_texel,
-    }: GreedyConfig<D, FA, FL, FG, FO, FS, FP, FT>,
+    }: GreedyConfig<D, FV, FA, FL, FG, FO, FS, FP, FT>,
 ) -> Box<SuspendedMesh<'a>>
 where
+    FV: for<'r> FnMut(&'r mut D, Vec3<i32>) -> V + 'a,
    FA: for<'r> FnMut(&'r mut D, Vec3<i32>) -> f32 + 'a,
    FL: for<'r> FnMut(&'r mut D, Vec3<i32>) -> f32 + 'a,
    FG: for<'r> FnMut(&'r mut D, Vec3<i32>) -> f32 + 'a,
    FO: for<'r> FnMut(&'r mut D, Vec3<i32>) -> bool + 'a,
-    FS: for<'r> FnMut(&'r mut D, Vec3<i32>, Vec3<i32>, Vec2<Vec3<i32>>) -> Option<(bool, M)>,
+    FS: for<'r> FnMut(&'r mut D, Vec3<i32>, V, V, /*Vec3<i32>, */Vec2<Vec3<i32>>) -> Option<(bool, M)>,
    FP: FnMut(Vec2<u16>, Vec2<Vec2<u16>>, Vec3<f32>, Vec2<Vec3<f32>>, Vec3<f32>, &M),
    FT: for<'r> FnMut(&'r mut D, Vec3<i32>, u8, u8, bool) -> [u8; 4] + 'a,
 {
@ -451,18 +473,25 @@ where

    // x (u = y, v = z)
    greedy_mesh_cross_section(
-        Vec3::new(greedy_size.y, greedy_size.z, greedy_size_cross.x),
-        |pos| {
+        Vec3::new(greedy_size.z, greedy_size.y, greedy_size_cross.x),
+        #[inline(always)] |pos| {
+            let pos = draw_delta + Vec3::new(pos.z, pos.y, pos.x);
+            let delta = Vec3::unit_x();
+            let from = get_vox(&mut data, pos - delta);
+            let to = get_vox(&mut data, pos);
            should_draw(
                &mut data,
-                draw_delta + Vec3::new(pos.z, pos.x, pos.y),
-                Vec3::unit_x(),
-                Vec2::new(Vec3::unit_y(), Vec3::unit_z()),
+                /* draw_delta + Vec3::new(pos.z, pos.x, pos.y),
+                Vec3::unit_x(), */
+                pos,
+                from,
+                to,
+                Vec2::new(Vec3::unit_z(), Vec3::unit_y()),
            )
        },
        |pos, dim, &(faces_forward, ref meta)| {
-            let pos = Vec3::new(pos.z, pos.x, pos.y);
-            let uv = Vec2::new(Vec3::unit_y(), Vec3::unit_z());
+            let pos = Vec3::new(pos.z, pos.y, pos.x);
+            let uv = Vec2::new(Vec3::unit_z(), Vec3::unit_y());
            let norm = Vec3::unit_x();
            let atlas_pos = add_to_atlas(
                atlas,
@ -479,8 +508,8 @@ where
                pos,
                dim,
                uv,
-                norm,
-                faces_forward,
+                -norm,
+                !faces_forward,
                meta,
                atlas_pos,
                |atlas_pos, dim, pos, draw_dim, norm, meta| {
@ -490,20 +519,27 @@ where
        },
    );

-    // y (u = z, v = x)
+    // y (u = x, v = z)
    greedy_mesh_cross_section(
-        Vec3::new(greedy_size.z, greedy_size.x, greedy_size_cross.y),
-        |pos| {
+        Vec3::new(greedy_size.x, greedy_size.z, greedy_size_cross.y),
+        #[inline(always)] |pos| {
+            let pos = draw_delta + Vec3::new(pos.x, pos.z, pos.y);
+            let delta = Vec3::unit_y();
+            let from = get_vox(&mut data, pos - delta);
+            let to = get_vox(&mut data, pos);
            should_draw(
                &mut data,
-                draw_delta + Vec3::new(pos.y, pos.z, pos.x),
-                Vec3::unit_y(),
-                Vec2::new(Vec3::unit_z(), Vec3::unit_x()),
+                /* draw_delta + Vec3::new(pos.y, pos.z, pos.x),
+                Vec3::unit_y(), */
+                pos,
+                from,
+                to,
+                Vec2::new(Vec3::unit_x(), Vec3::unit_z()),
            )
        },
        |pos, dim, &(faces_forward, ref meta)| {
-            let pos = Vec3::new(pos.y, pos.z, pos.x);
-            let uv = Vec2::new(Vec3::unit_z(), Vec3::unit_x());
+            let pos = Vec3::new(pos.x, pos.z, pos.y);
+            let uv = Vec2::new(Vec3::unit_x(), Vec3::unit_z());
            let norm = Vec3::unit_y();
            let atlas_pos = add_to_atlas(
                atlas,
@ -520,8 +556,8 @@ where
                pos,
                dim,
                uv,
-                norm,
-                faces_forward,
+                -norm,
+                !faces_forward,
                meta,
                atlas_pos,
                |atlas_pos, dim, pos, draw_dim, norm, meta| {
@ -534,11 +570,18 @@ where
    // z (u = x, v = y)
    greedy_mesh_cross_section(
        Vec3::new(greedy_size.x, greedy_size.y, greedy_size_cross.z),
-        |pos| {
+        #[inline(always)] |pos| {
+            let pos = draw_delta + Vec3::new(pos.x, pos.y, pos.z);
+            let delta = Vec3::unit_z();
+            let from = get_vox(&mut data, pos - delta);
+            let to = get_vox(&mut data, pos);
            should_draw(
                &mut data,
-                draw_delta + Vec3::new(pos.x, pos.y, pos.z),
-                Vec3::unit_z(),
+                /* draw_delta + Vec3::new(pos.x, pos.y, pos.z),
+                Vec3::unit_z(), */
+                pos,
+                from,
+                to,
                Vec2::new(Vec3::unit_x(), Vec3::unit_y()),
            )
        },
@ -604,12 +647,10 @@ fn greedy_mesh_cross_section<M: PartialEq>(
    let mut mask = &mut mask[0..dims.y * dims.x];
    (0..dims.z + 1).for_each(|d| {
        // Compute mask
-        let mut posi = 0;
-        (0..dims.y).for_each(|j| {
-            (0..dims.x).for_each(|i| {
+        mask.chunks_exact_mut(dims.x).enumerate().for_each(|(j, mask)| {
+            mask.iter_mut().enumerate().for_each(|(i, mask)| {
                // NOTE: Safe because dims.z actually fits in a u16.
-                mask[posi] = draw_face(Vec3::new(i as i32, j as i32, d as i32));
-                posi += 1;
+                *mask = draw_face(Vec3::new(i as i32, j as i32, d as i32));
            });
        });
        /* mask.iter_mut().enumerate().for_each(|(posi, mask)| {
@ -620,12 +661,13 @@ fn greedy_mesh_cross_section<M: PartialEq>(
        }); */

        (0..dims.y).for_each(|j| {
+            let mask = &mut mask[j * dims.x..];
            let mut i = 0;
            while i < dims.x {
                // Compute width (number of set x bits for this row and layer, starting at the
                // current minimum column).
-                if let Some(ori) = &mask[j * dims.x + i] {
-                    let width = 1 + mask[j * dims.x + i + 1..j * dims.x + dims.x]
+                if let Some(ori) = &mask[i] {
+                    let width = 1 + mask[i + 1..dims.x]
                        .iter()
                        .take_while(move |&mask| mask.as_ref() == Some(ori))
                        .count();
@ -633,19 +675,20 @@ fn greedy_mesh_cross_section<M: PartialEq>(
                    // Compute height (number of rows having w set x bits for this layer, starting
                    // at the current minimum column and row).
                    let height = 1
-                        + (j + 1..dims.y)
-                            .take_while(|h| {
-                                mask[h * dims.x + i..h * dims.x + max_x]
+                        + mask[dims.x..(dims.y - j) * dims.x/* + max_x + i*/]
+                            .chunks_exact(dims.x)
+                            .take_while(|mask| {
+                                mask[i..max_x]
                                    .iter()
                                    .all(|mask| mask.as_ref() == Some(ori))
                            })
                            .count();
-                    let max_y = j + height;
+                    let max_y = height;
                    // Add quad.
                    push_quads(Vec3::new(i, j, d), Vec2::new(width, height), ori);
                    // Unset mask bits in drawn region, so we don't try to re-draw them.
-                    (j..max_y).for_each(|l| {
-                        mask[l * dims.x + i..l * dims.x + max_x]
+                    mask[..max_y * dims.x].chunks_exact_mut(dims.x).for_each(|mask| {
+                        mask[i..max_x]
                            .iter_mut()
                            .for_each(|mask| {
                                *mask = None;
--- a/voxygen/src/mesh/segment.rs
+++ b/voxygen/src/mesh/segment.rs
@ -62,12 +62,12 @@ where
    };
    let get_glow = |_vol: &mut V, _pos: Vec3<i32>| 0.0;
    let get_opacity = |vol: &mut V, pos: Vec3<i32>| vol.get(pos).map_or(true, |vox| vox.is_empty());
-    let should_draw = |vol: &mut V, pos: Vec3<i32>, delta: Vec3<i32>, uv| {
-        should_draw_greedy(pos, delta, uv, |vox| {
+    let should_draw = |vol: &mut V, pos: Vec3<i32>, from: Cell, to: Cell, /*delta: Vec3<i32>, */uv| {
+        should_draw_greedy(pos, from, to,/* delta, */uv/*, |vox| {
            vol.get(vox)
                .map(|vox| *vox)
                .unwrap_or_else(|_| Cell::empty())
-        })
+        } */)
    };
    let create_opaque = |atlas_pos, pos, norm| {
        TerrainVertex::new_figure(atlas_pos, (pos + offs) * scale, norm, bone_idx)
@ -78,6 +78,11 @@ where
        draw_delta,
        greedy_size,
        greedy_size_cross,
+        get_vox: |vol: &mut V, vox| {
+            vol.get(vox)
+                .map(|vox| *vox)
+                .unwrap_or_else(|_| Cell::empty())
+        },
        get_ao: |_: &mut V, _: Vec3<i32>| 1.0,
        get_light,
        get_glow,
@ -204,8 +209,8 @@ where
        flat_get(flat, pos).get_color().unwrap_or_else(Rgb::zero)
    };
    let get_opacity = move |flat: &mut _, pos: Vec3<i32>| flat_get(flat, pos).is_empty();
-    let should_draw = move |flat: &mut _, pos: Vec3<i32>, delta: Vec3<i32>, uv| {
-        should_draw_greedy_ao(vertical_stripes, pos, delta, uv, |vox| flat_get(flat, vox))
+    let should_draw = move |flat: &mut _, pos: Vec3<i32>, from: Cell, to: Cell, /*delta: Vec3<i32>, */uv| {
+        should_draw_greedy_ao(vertical_stripes, pos, from, to,/* delta, */uv/* , |vox| flat_get(flat, vox) */)
    };
    // NOTE: Fits in i16 (much lower actually) so f32 is no problem (and the final
    // position, pos + mesh_delta, is guaranteed to fit in an f32).
@ -219,6 +224,7 @@ where
        draw_delta,
        greedy_size,
        greedy_size_cross,
+        get_vox: move |flat: &mut _, vox| flat_get(flat, vox),
        get_ao: |_: &mut _, _: Vec3<i32>| 1.0,
        get_light,
        get_glow,
@ -292,12 +298,12 @@ where
            .unwrap_or_else(Rgb::zero)
    };
    let get_opacity = |vol: &mut V, pos: Vec3<i32>| vol.get(pos).map_or(true, |vox| vox.is_empty());
-    let should_draw = |vol: &mut V, pos: Vec3<i32>, delta: Vec3<i32>, uv| {
-        should_draw_greedy(pos, delta, uv, |vox| {
+    let should_draw = |vol: &mut V, pos: Vec3<i32>, from: Cell, to: Cell, /*delta: Vec3<i32>, */uv| {
+        should_draw_greedy(pos, from, to,/* delta, */uv/*, |vox| {
            vol.get(vox)
                .map(|vox| *vox)
                .unwrap_or_else(|_| Cell::empty())
-        })
+        }*/)
    };
    let create_opaque = |_atlas_pos, pos: Vec3<f32>, norm| ParticleVertex::new(pos, norm);

@ -307,6 +313,11 @@ where
        draw_delta,
        greedy_size,
        greedy_size_cross,
+        get_vox: |vol: &mut V, vox| {
+            vol.get(vox)
+                .map(|vox| *vox)
+                .unwrap_or_else(|_| Cell::empty())
+        },
        get_ao: |_: &mut V, _: Vec3<i32>| 1.0,
        get_light,
        get_glow,
@ -333,12 +344,14 @@ where

 fn should_draw_greedy(
    pos: Vec3<i32>,
-    delta: Vec3<i32>,
+    from: Cell,
+    to: Cell,
+    /* delta: Vec3<i32>, */
    _uv: Vec2<Vec3<i32>>,
-    flat_get: impl Fn(Vec3<i32>) -> Cell,
+    /* flat_get: impl Fn(Vec3<i32>) -> Cell, */
 ) -> Option<(bool, /* u8 */ ())> {
-    let from = flat_get(pos - delta);
-    let to = flat_get(pos);
+    /* let from = flat_get(pos - delta);
+    let to = flat_get(pos); */
    let from_opaque = !from.is_empty();
    if from_opaque != to.is_empty() {
        None
@ -352,12 +365,14 @@ fn should_draw_greedy(
 fn should_draw_greedy_ao(
    vertical_stripes: bool,
    pos: Vec3<i32>,
-    delta: Vec3<i32>,
+    from: Cell,
+    to: Cell,
+    /* delta: Vec3<i32>, */
    _uv: Vec2<Vec3<i32>>,
-    flat_get: impl Fn(Vec3<i32>) -> Cell,
+    /* flat_get: impl Fn(Vec3<i32>) -> Cell, */
 ) -> Option<(bool, bool)> {
-    let from = flat_get(pos - delta);
-    let to = flat_get(pos);
+    /* let from = flat_get(pos - delta);
+    let to = flat_get(pos); */
    let from_opaque = !from.is_empty();
    if from_opaque != to.is_empty() {
        None
--- a/voxygen/src/mesh/terrain.rs
+++ b/voxygen/src/mesh/terrain.rs
@ -9,7 +9,7 @@ use crate::{
    scene::terrain::BlocksOfInterest,
 };
 use common::{
-    terrain::Block,
+    terrain::{Block, TerrainChunk},
    util::either_with,
    vol::{ReadVol, RectRasterableVol},
    volumes::vol_grid_2d::{CachedVolGrid2d, VolGrid2d},
@ -39,18 +39,33 @@ pub const MAX_LIGHT_DIST: i32 = SUNLIGHT as i32;
 type CalcLightFn<V, I> = impl Fn(Vec3<i32>) -> f32 + 'static + Send + Sync;

 #[inline(always)]
+/* #[allow(unsafe_code)] */
 fn flat_get<'a>(flat: &'a Vec<Block>, w: i32, h: i32, d: i32) -> impl Fn(Vec3<i32>) -> Block + 'a {
-    let hd = h * d;
-    let flat = &flat[0..(w * hd) as usize];
+    let wh = w * h;
+    let flat = &flat[0..(d * wh) as usize];
    #[inline(always)] move |Vec3 { x, y, z }| {
        // z can range from -1..range.size().d + 1
        let z = z + 1;
-        flat[(x * hd + y * d + z) as usize]
+        flat[((z * wh + y * w + x) as usize)]
+        /* unsafe { *flat.get_unchecked((z * wh + y * w + x) as usize) } */
        /* match flat.get((x * hd + y * d + z) as usize).copied() {
            Some(b) => b,
            None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h),
        } */
    }
+
+    /* let hd = h * d;
+    let flat = &flat[0..(w * hd) as usize];
+    #[inline(always)] move |Vec3 { x, y, z }| {
+        // z can range from -1..range.size().d + 1
+        let z = z + 1;
+        /* flat[((x * hd + y * d + z) as usize)] */
+        unsafe { *flat.get_unchecked((x * hd + y * d + z) as usize) }
+        /* match flat.get((x * hd + y * d + z) as usize).copied() {
+            Some(b) => b,
+            None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h),
+        } */
+    } */
 }

 fn calc_light<'a,
@ -303,7 +318,7 @@ fn calc_light<'a,
            .copied()
            .unwrap_or(default_light);

-        if l != OPAQUE && l != UNKNOWN {
+        if /* l != OPAQUE && */l != UNKNOWN {
            l as f32 * SUNLIGHT_INV
        } else {
            0.0
@ -311,9 +326,11 @@ fn calc_light<'a,
    }
 }

+type V = TerrainChunk;
+
 #[allow(clippy::type_complexity)]
 #[inline(always)]
-pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + 'static>(
+pub fn generate_mesh<'a/*, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + 'static*/>(
    vol: &'a VolGrid2d<V>,
    (range, max_texture_size, boi): (Aabb<i32>, Vec2<u16>, &'a BlocksOfInterest),
 ) -> MeshGen<
@ -340,17 +357,38 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
    let (w, h, d) = range.size().into_tuple();
    // z can range from -1..range.size().d + 1
    let d = d + 2;
-    {
+
+    /// Representative block for air.
+    const AIR: Block = Block::air(common::terrain::sprite::SpriteKind::Empty);
+    /// Representative block for liquid.
+    ///
+    /// FIXME: Can you really skip meshing for general liquids?  Probably not...
+    const LIQUID: Block = Block::water(common::terrain::sprite::SpriteKind::Empty);
+    /// Representtive block for solids.
+    ///
+    /// FIXME: Really hacky!
+    const OPAQUE: Block = Block::lava(common::terrain::sprite::SpriteKind::Empty);
+
+    const ALL_OPAQUE: u8 = 0b1;
+    const ALL_LIQUID: u8 = 0b10;
+    const ALL_AIR: u8 = 0b100;
+    // For each horizontal slice of the chunk, we keep track of what kinds of blocks are in it.
+    // This allows us to compute limits after the fact, much more precisely than keeping track of a
+    // single intersection would; it also lets us skip homogeoneous slices entirely.
+    let mut row_kinds = vec![0; d as usize];
+    /* {
        span!(_guard, "copy to flat array");
        let hd = h * d;
        /*let flat = */{
+            let mut arena = bumpalo::Bump::new();
+
            /* let mut volume = vol.cached(); */
-            const AIR: Block = Block::air(common::terrain::sprite::SpriteKind::Empty);
            // TODO: Once we can manage it sensibly, consider using something like
            // Option<Block> instead of just assuming air.
            /*let mut */flat = vec![AIR; (w * /*h * d*/hd) as usize]
                /* Vec::with_capacity((w * /*h * d*/hd) as usize) */
                ;
+            let row_kinds = &mut row_kinds[0..d as usize];
            let flat = &mut flat/*.spare_capacity_mut()*/[0..(w * hd) as usize];
            /* /*volume*/vol.iter().for_each(|(chunk_key, chunk)| {
                let corner = chunk.key_pos(chunk_key);
@ -368,6 +406,8 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
            /* vol.iter().for_each(|(key, chonk)| { */
                let chonk = &*chonk;
                let pos = vol.key_pos(key);
+                /* // Avoid diagonals.
+                if pos.x != range.min.x + 1 && pos.y != range.min.y + 1 { return; } */
                // Calculate intersection of Aabb and this chunk
                // TODO: should we do this more implicitly as part of the loop
                // TODO: this probably has to be computed in the chunk.for_each_in() as well
@ -390,6 +430,9 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
                    min: VolGrid2d::<V>::chunk_offs(intersection_.min) + Vec3::new(0, 0, z_diff),
                    max: VolGrid2d::<V>::chunk_offs(intersection_.max) + Vec3::new(1, 1, z_diff + 1),
                };
+                let z_diff = z_diff + chonk.get_min_z();
+                let z_max = chonk.get_max_z() - chonk.get_min_z();
+                let below = *chonk.below();

                /* [[0  ..1]; [0  ..1];   [0..d]]
                [[0  ..1]; [1  ..h-1]; [0..d]]
@ -402,8 +445,46 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '

                [1,1; d] */

+                let flat_chunk = chonk.make_flat(/*&stone_slice, &air_slice, */&arena);
+
                let mut i = (x_diff * hd + y_diff * d) as usize;
                let hd_ = (intersection.size().h * d) as usize;
+
+                let min_z_ = z_diff - intersection.min.z;
+                let max_z_ = z_max + z_diff - intersection.min.z;
+                let row_fill = if below.is_opaque() {
+                    /* opaque_limits = opaque_limits
+                        .map(|l| l.including(z_diff))
+                        .or_else(|| Some(Limits::from_value(z_diff))); */
+                    ALL_OPAQUE
+                } else if below.is_liquid() {
+                    /* fluid_limits = fluid_limits
+                        .map(|l| l.including(z_diff))
+                        .or_else(|| Some(Limits::from_value(z_diff))); */
+                    ALL_LIQUID
+                } else {
+                    /* // Assume air
+                    air_limits = air_limits
+                        .map(|l| l.including(z_diff))
+                        .or_else(|| Some(Limits::from_value(z_diff))); */
+                    ALL_AIR
+                };
+
+                let skip_count = min_z_.max(0);
+                let take_count = (max_z_.min(d) - skip_count).max(0);
+                let skip_count = skip_count as usize;
+                let take_count = take_count as usize;
+
+                // Fill the bottom rows with their below type.
+                row_kinds.iter_mut().take(skip_count).for_each(|row| {
+                    *row |= row_fill;
+                });
+                // Fill the top rows with air (we just assume that's the above type, since it
+                // always is in practice).
+                row_kinds.iter_mut().skip(skip_count + take_count).for_each(|row| {
+                    *row |= ALL_AIR;
+                });
+
                // dbg!(pos, intersection_, intersection, range, flat_range, x_diff, y_diff, z_diff, y_rem, x_off, i);
                (intersection.min.x..intersection.max.x).for_each(|x| {
                let flat = &mut flat[i..i + /*intersection.size().y * intersection.size().z*/hd_];
@ -414,7 +495,14 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
                /* chonk.for_each_in(intersection, |pos_offset, block| {
                    pos_offset.z += z_diff;
                }); */
-                flat.into_iter().enumerate().for_each(|(z, flat)| {
+
+                // intersection.min.z = range.min.z - 1 - range.min.z = -1
+                // z_diff = chonk.get_min_z() - range.min.z
+                // min_z_ = chonk.get_min_z() - (range.min.z - 1)
+                //
+                // max_z_ = (chonk.get_max_z() - (range.min.z - 1)).min(d - skip_count)
+                flat[0..skip_count].fill(below);
+                flat.into_iter().zip(row_kinds.into_iter()).enumerate().skip(skip_count).take(take_count).for_each(|(z, (flat, row))| {
                let z = z as i32 + intersection.min.z;
                /* (intersection.min.z..intersection.max.z).for_each(|z| { */
                /* let mut i = ((x_diff + (x - intersection.min.x)) * hd + (y_diff + (y - intersection.min.y)) * d + (z - intersection.min.z)) as usize; */
@ -432,21 +520,34 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
                            // since it's not clear this will work properly with liquid.
                            .unwrap_or(AIR); */
                    /* if let Ok(&block) = chonk.get(Vec3::new(x, y, z - z_diff)) */
-                    let block = chonk.get(Vec3::new(x, y, z - z_diff)).copied().unwrap_or(AIR);
+                    let block_pos = Vec3::new(x, y, z - z_diff);
+                    let block = /* if block_pos.z < 0 {
+                        *chonk.below()
+                    } else if block_pos.z >= z_max {
+                        *chonk.above()
+                    } else */{
+                        let grp_id = common::terrain::TerrainSubChunk::grp_idx(block_pos) as usize;
+                        let rel_id = common::terrain::TerrainSubChunk::rel_idx(block_pos) as usize;
+                        flat_chunk[grp_id][rel_id]
+                    };
+                    /* let block = chonk.get(block_pos).copied().unwrap_or(AIR); */
                    {
-                        if block.is_opaque() {
-                            opaque_limits = opaque_limits
+                        *row |= if block.is_opaque() {
+                            /* opaque_limits = opaque_limits
                                .map(|l| l.including(z))
-                                .or_else(|| Some(Limits::from_value(z)));
+                                .or_else(|| Some(Limits::from_value(z))); */
+                            ALL_OPAQUE
                        } else if block.is_liquid() {
-                            fluid_limits = fluid_limits
+                            /* fluid_limits = fluid_limits
                                .map(|l| l.including(z))
-                                .or_else(|| Some(Limits::from_value(z)));
+                                .or_else(|| Some(Limits::from_value(z))); */
+                            ALL_LIQUID
                        } else {
                            // Assume air
-                            air_limits = air_limits
+                            /* air_limits = air_limits
                                .map(|l| l.including(z))
-                                .or_else(|| Some(Limits::from_value(z)));
+                                .or_else(|| Some(Limits::from_value(z))); */
+                            ALL_AIR
                        };
                        /*flat[i] = block*//*unsafe { flat.get_unchecked_mut(i) }*//*flat[i].write(block);*/
                        /* flat.write(block); */
@ -466,9 +567,188 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
                // i += x_off;
                i += hd as usize;
                });
+
+                arena.reset();
            /* }); */
            });
            });
+
+            // Compute limits (TODO: see if we can skip this, or make it more precise?).
+            row_kinds.iter().enumerate().for_each(|(z, row)| {
+                let z = z as i32 /* + intersection.min.z */- 1;
+                if row & ALL_OPAQUE != 0 {
+                    opaque_limits = opaque_limits
+                        .map(|l| l.including(z))
+                        .or_else(|| Some(Limits::from_value(z)));
+                }
+                if row & ALL_LIQUID != 0 {
+                    fluid_limits = fluid_limits
+                        .map(|l| l.including(z))
+                        .or_else(|| Some(Limits::from_value(z)));
+                }
+                if row & ALL_AIR != 0 {
+                    air_limits = air_limits
+                        .map(|l| l.including(z))
+                        .or_else(|| Some(Limits::from_value(z)));
+                }
+            });
+        }
+        /* unsafe { flat.set_len((w * hd) as usize); } */
+    } */
+    {
+        span!(_guard, "copy to flat array");
+        let wh = w * h;
+        {
+            let mut arena = bumpalo::Bump::new();
+
+            flat = vec![AIR; (d * wh) as usize];
+            let row_kinds = &mut row_kinds[0..d as usize];
+            let flat = &mut flat[0..(d * wh) as usize];
+            let flat_range = Aabb {
+                min: range.min - Vec3::new(0, 0, 1),
+                max: range.max - Vec3::new(1, 1, 0),
+            };
+            let min_chunk_key = vol.pos_key(flat_range.min);
+            let max_chunk_key = vol.pos_key(flat_range.max);
+            (min_chunk_key.x..max_chunk_key.x + 1).for_each(|key_x| {
+            (min_chunk_key.y..max_chunk_key.y + 1).for_each(|key_y| {
+            let key = Vec2::new(key_x, key_y);
+            let chonk = vol.get_key(key).expect("All keys in range must have chonks.");
+                let chonk = &*chonk;
+                let pos = vol.key_pos(key);
+                let intersection_ = flat_range.intersection(Aabb {
+                    min: pos.with_z(i32::MIN),
+                    max: (pos + VolGrid2d::<V>::chunk_size().map(|e| e as i32) - 1).with_z(i32::MAX),
+                });
+
+                // Map intersection into chunk coordinates
+                let x_diff = intersection_.min.x - flat_range.min.x;
+                let y_diff = intersection_.min.y - flat_range.min.y;
+                let z_diff = -range.min.z;
+                /* let y_rem = flat_range.max.y - intersection_.max.y;
+                let x_off = ((y_diff + y_rem) * d) as usize; */
+
+                let intersection = Aabb {
+                    min: VolGrid2d::<V>::chunk_offs(intersection_.min) + Vec3::new(0, 0, z_diff),
+                    max: VolGrid2d::<V>::chunk_offs(intersection_.max) + Vec3::new(1, 1, z_diff + 1),
+                };
+                let z_diff = z_diff + chonk.get_min_z();
+                let z_max = chonk.get_max_z() - chonk.get_min_z();
+                let below = *chonk.below();
+
+                let flat_chunk = chonk.make_flat(&arena);
+
+                let min_z_ = z_diff - intersection.min.z;
+                let max_z_ = z_max + z_diff - intersection.min.z;
+
+                let row_fill = if below.is_opaque() {
+                    ALL_OPAQUE
+                } else if below.is_liquid() {
+                    ALL_LIQUID
+                } else {
+                    ALL_AIR
+                };
+
+                let skip_count = min_z_.max(0);
+                let take_count = (max_z_.min(d) - skip_count).max(0);
+                let skip_count = skip_count as usize;
+                let take_count = take_count as usize;
+
+                row_kinds.iter_mut().take(skip_count).for_each(|row| {
+                    *row |= row_fill;
+                });
+                row_kinds.iter_mut().skip(skip_count + take_count).for_each(|row| {
+                    *row |= ALL_AIR;
+                });
+
+                // dbg!(pos, intersection_, intersection, range, flat_range, x_diff, y_diff, z_diff, y_rem, x_off, i);
+                flat.chunks_exact_mut(wh as usize).take(skip_count).for_each(|flat| {
+                flat.chunks_exact_mut(w as usize).skip(y_diff as usize).take((intersection.max.y - intersection.min.y) as usize).for_each(|flat| {
+                flat.into_iter().skip(x_diff as usize).take((intersection.max.x - intersection.min.x) as usize).for_each(|flat| {
+                    *flat = below;
+                });
+                });
+                });
+
+                flat.chunks_exact_mut(wh as usize).zip(row_kinds.into_iter()).enumerate().skip(skip_count).take(take_count).for_each(|(z, (flat, row_))| {
+                let mut row = *row_;
+                let z = z as i32 + intersection.min.z - z_diff;
+                flat.chunks_exact_mut(w as usize).skip(y_diff as usize).enumerate().take((intersection.max.y - intersection.min.y) as usize).for_each(|(y, flat)| {
+                let y = y as i32 + intersection.min.y;
+                flat.into_iter().skip(x_diff as usize).enumerate().take((intersection.max.x - intersection.min.x) as usize).for_each(|(x, flat)| {
+                    let x = x as i32 + intersection.min.x;
+                    let block_pos = Vec3::new(x, y, z);
+                    let block = {
+                        let grp_id = common::terrain::TerrainSubChunk::grp_idx(block_pos) as usize;
+                        let rel_id = common::terrain::TerrainSubChunk::rel_idx(block_pos) as usize;
+                        flat_chunk[grp_id][rel_id]
+                    };
+                    {
+                        row |= if block.is_opaque() {
+                            ALL_OPAQUE
+                        } else if block.is_liquid() {
+                            ALL_LIQUID
+                        } else {
+                            ALL_AIR
+                        };
+                        *flat = block;
+                    }
+                });
+                });
+                *row_ = row;
+                });
+                /* (intersection.min.z..intersection.max.z).for_each(|z| {
+                let flat = &mut flat[i..i + /*intersection.size().y * intersection.size().z*/hd_];
+                flat.chunks_exact_mut(d as usize).enumerate().for_each(|(y, flat)| {
+                let y = y as i32 + intersection.min.y;
+                flat[0..skip_count].fill(below);
+                flat.into_iter().zip(row_kinds.into_iter()).enumerate().skip(skip_count).take(take_count).for_each(|(z, (flat, row))| {
+                let z = z as i32 + intersection.min.z;
+                /* let mut i = ((x_diff + (x - intersection.min.x)) * hd + (y_diff + (y - intersection.min.y)) * d + (z - intersection.min.z)) as usize; */
+                    let block_pos = Vec3::new(x, y, z - z_diff);
+                    let block = {
+                        let grp_id = common::terrain::TerrainSubChunk::grp_idx(block_pos) as usize;
+                        let rel_id = common::terrain::TerrainSubChunk::rel_idx(block_pos) as usize;
+                        flat_chunk[grp_id][rel_id]
+                    };
+                    {
+                        *row |= if block.is_opaque() {
+                            ALL_OPAQUE
+                        } else if block.is_liquid() {
+                            ALL_LIQUID
+                        } else {
+                            ALL_AIR
+                        };
+                        *flat = block;
+                    }
+                });
+                });
+                i += hd as usize;
+                }); */
+
+                arena.reset();
+            });
+            });
+
+            // Compute limits (TODO: see if we can skip this, or make it more precise?).
+            row_kinds.iter().enumerate().for_each(|(z, row)| {
+                let z = z as i32 /* + intersection.min.z */- 1;
+                if row & ALL_OPAQUE != 0 {
+                    opaque_limits = opaque_limits
+                        .map(|l| l.including(z))
+                        .or_else(|| Some(Limits::from_value(z)));
+                }
+                if row & ALL_LIQUID != 0 {
+                    fluid_limits = fluid_limits
+                        .map(|l| l.including(z))
+                        .or_else(|| Some(Limits::from_value(z)));
+                }
+                if row & ALL_AIR != 0 {
+                    air_limits = air_limits
+                        .map(|l| l.including(z))
+                        .or_else(|| Some(Limits::from_value(z)));
+                }
+            });
        }
        /* unsafe { flat.set_len((w * hd) as usize); } */
    }
@ -479,12 +759,12 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
    let (z_start, z_end) = match (air_limits, fluid_limits, opaque_limits) {
        (Some(air), Some(fluid), Some(opaque)) => {
            let air_fluid = air.intersection(fluid);
-            if let Some(intersection) = air_fluid.filter(|limits| limits.min + 1 == limits.max) {
+            /* if let Some(intersection) = air_fluid.filter(|limits| limits.min + 1 == limits.max) {
                // If there is a planar air-fluid boundary, just draw it directly and avoid
                // redundantly meshing the whole fluid volume, then interect the ground-fluid
                // and ground-air meshes to make sure we don't miss anything.
                either_with(air.intersection(opaque), fluid.intersection(opaque), Limits::union)
-            } else {
+            } else */{
                // Otherwise, do a normal three-way intersection.
                air.three_way_intersection(fluid, opaque)
            }
@ -564,22 +844,10 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '

    let max_size = max_texture_size;
    assert!(z_end >= z_start);
-    let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1);
-    // NOTE: Terrain sizes are limited to 32 x 32 x 16384 (to fit in 24 bits: 5 + 5
-    // + 14). FIXME: Make this function fallible, since the terrain
-    // information might be dynamically generated which would make this hard
-    // to enforce.
-    assert!(greedy_size.x <= 32 && greedy_size.y <= 32 && greedy_size.z <= 16384);
-    // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16,
-    // which always fits into a f32.
-    let max_bounds: Vec3<f32> = greedy_size.as_::<f32>();
-    // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16,
-    // which always fits into a usize.
-    let greedy_size = greedy_size.as_::<usize>();
-    let greedy_size_cross = Vec3::new(greedy_size.x - 1, greedy_size.y - 1, greedy_size.z);
-    let draw_delta = Vec3::new(1, 1, z_start);

    let flat_get = flat_get(&flat, w, h, d);
+    let get_color =
+        #[inline(always)] |_: &mut (), pos: Vec3<i32>| flat_get(pos).get_color().unwrap_or_else(Rgb::zero);
    let get_light = #[inline(always)] |_: &mut (), pos: Vec3<i32>| {
        if flat_get(pos).is_opaque() {
            0.0
@ -591,25 +859,44 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
        if flat_get(pos).is_opaque() { 0.0 } else { 1.0 }
    };
    let get_glow = #[inline(always)] |_: &mut (), pos: Vec3<i32>| glow(pos + range.min);
-    let get_color =
-        #[inline(always)] |_: &mut (), pos: Vec3<i32>| flat_get(pos).get_color().unwrap_or_else(Rgb::zero);
    let get_opacity = #[inline(always)] |_: &mut (), pos: Vec3<i32>| !flat_get(pos).is_opaque();
-    let should_draw = #[inline(always)] |_: &mut (), pos: Vec3<i32>, delta: Vec3<i32>, _uv| {
-        should_draw_greedy(pos, delta, #[inline(always)] |pos| flat_get(pos))
+    let should_draw = #[inline(always)] |_: &mut (), /*pos*/_: Vec3<i32>, from: Block, to: Block,/* delta: Vec3<i32>,*/ _uv: Vec2<Vec3<i32>>| {
+        should_draw_greedy(/*pos, */from, to/*, delta, #[inline(always)] |pos| flat_get(pos) */)
    };
+
+    let mut greedy =
+        GreedyMesh::<guillotiere::SimpleAtlasAllocator>::new(max_size, greedy::terrain_config());
+    let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1);
+    let mesh_delta = Vec3::new(0.0, 0.0, (z_start + range.min.z) as f32);
+    let max_bounds: Vec3<f32> = greedy_size.as_::<f32>();
+    let mut do_draw_greedy = #[inline(always)] |z_start: i32, z_end: i32| {
+    // dbg!(range.min, z_start, z_end);
+    let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1);
+    // NOTE: Terrain sizes are limited to 32 x 32 x 16384 (to fit in 24 bits: 5 + 5
+    // + 14). FIXME: Make this function fallible, since the terrain
+    // information might be dynamically generated which would make this hard
+    // to enforce.
+    assert!(greedy_size.x <= 32 && greedy_size.y <= 32 && greedy_size.z <= 16384);
+    // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16,
+    // which always fits into a f32.
+    // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16,
+    // which always fits into a usize.
+    let greedy_size = greedy_size.as_::<usize>();
+    let greedy_size_cross = Vec3::new(greedy_size.x - 1, greedy_size.y - 1, greedy_size.z);
+    let draw_delta = Vec3::new(1, 1, z_start);
+
    // NOTE: Conversion to f32 is fine since this i32 is actually in bounds for u16.
    let mesh_delta = Vec3::new(0.0, 0.0, (z_start + range.min.z) as f32);
    let create_opaque =
-        #[inline(always)] |atlas_pos, pos, norm, meta| TerrainVertex::new(atlas_pos, pos + mesh_delta, norm, meta);
-    let create_transparent = #[inline(always)] |_atlas_pos, pos, norm| FluidVertex::new(pos + mesh_delta, norm);
+        #[inline(always)] |atlas_pos, pos: Vec3<f32>, norm, meta| TerrainVertex::new(atlas_pos, pos + mesh_delta, norm, meta);
+    let create_transparent = #[inline(always)] |_atlas_pos: Vec2<u16>, pos: Vec3<f32>, norm: Vec3<f32>| FluidVertex::new(pos + mesh_delta, norm);

-    let mut greedy =
-        GreedyMesh::<guillotiere::SimpleAtlasAllocator>::new(max_size, greedy::general_config());
    greedy.push(GreedyConfig {
        data: (),
        draw_delta,
        greedy_size,
        greedy_size_cross,
+        get_vox: #[inline(always)] |_: &mut (), pos| flat_get(pos),
        get_ao,
        get_light,
        get_glow,
@ -643,6 +930,66 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
            TerrainVertex::make_col_light(light, glow, get_color(data, pos), ao)
        },
    });
+    };
+
+    let mut z_start = z_start;
+    let mut row_iter = row_kinds.iter().enumerate();
+    let mut needs_draw = false;
+    row_kinds.array_windows().enumerate().skip(z_start as usize).take((z_end - z_start + 1) as usize).for_each(|(z, &[from_row, to_row])| {
+        let z = z as i32;
+        // Evaluate a "canonicalized" greedy mesh algorithm on this pair of row kinds, to see if we're
+        // about to switch (requiring us to draw a surface).
+        let from = match from_row {
+            ALL_AIR => Some(AIR),
+            ALL_LIQUID => Some(LIQUID),
+            ALL_OPAQUE => Some(OPAQUE),
+            _ => None,
+        };
+        let to = match to_row {
+            ALL_AIR => Some(AIR),
+            ALL_LIQUID => Some(LIQUID),
+            ALL_OPAQUE => Some(OPAQUE),
+            _ => None,
+        };
+        // There are two distinct cases:
+        let (from, to) = match from.zip(to) {
+            None => {
+                // At least one of the two rows is not homogeneous.
+                if !needs_draw {
+                    // The from is homogeneous (since !needs_draw), but the to is not.  We should
+                    // start a new draw without drawing the old volume.
+                    z_start = z;
+                    needs_draw = true;
+                }
+                // Otherwise, we were in the middle of drawing the previous row, so we just extend
+                // the current draw.
+                return;
+            },
+            Some(pair) => pair,
+        };
+        let old_needs_draw = needs_draw;
+        // The from *and* to are both homogeneous, so we can compute whether we should draw
+        // a surface between them.
+        needs_draw = should_draw_greedy(from, to).is_some();
+        if needs_draw == old_needs_draw {
+            // We continue the current draw (or nondraw).
+            return;
+        }
+        if old_needs_draw {
+            // old_needs_draw is true, so we need to start a fresh draw and end an earlier draw,
+            // drawing the existing volume (needs_draw is false).
+            do_draw_greedy(z_start, z - 1);
+        }
+        // We always must start a fresh draw.
+        z_start = z;
+    });
+    // Finally, draw any remaining terrain, if necessary.
+    if needs_draw {
+        /* if z_start != z_end {
+            dbg!(range.min, z_start, z_end);
+        } */
+        do_draw_greedy(z_start, z_end);
+    }

    let min_bounds = mesh_delta;
    let bounds = Aabb {
@ -668,18 +1015,37 @@ pub fn generate_mesh<'a, V: RectRasterableVol<Vox = Block> + ReadVol + Debug + '
 /// [scene::terrain::Terrain::skip_remesh].
 #[inline(always)]
 fn should_draw_greedy(
-    pos: Vec3<i32>,
-    delta: Vec3<i32>,
-    flat_get: impl Fn(Vec3<i32>) -> Block,
+    /* pos: Vec3<i32>, */
+    from: Block,
+    to: Block,
+    /* delta: Vec3<i32>,
+    flat_get: impl Fn(Vec3<i32>) -> Block, */
 ) -> Option<(bool, FaceKind)> {
-    let from = flat_get(pos - delta);
-    let to = flat_get(pos);
+    /* let from = flat_get(pos - delta);
+    let to = flat_get(pos); */
    // Don't use `is_opaque`, because it actually refers to light transmission
-    let from_filled = from.is_filled();
+    /* let from = from.kind() as u8 & 0xF;
+    let to = to.kind() as u8 & 0xF;
+    (from ^ to) | ((from.overflowing_sub(1) > to.overflowing_sub(1)) as u8 << 2) */
+    use common::terrain::BlockKind;
+    match (from.kind(), to.kind()) {
+        (BlockKind::Air, BlockKind::Water) => Some((false, FaceKind::Fluid)),
+        (BlockKind::Water, BlockKind::Air) => Some((true, FaceKind::Fluid)),
+        (BlockKind::Air, BlockKind::Air) | (BlockKind::Water, BlockKind::Water) => None,
+        (BlockKind::Air, _) => Some((false, FaceKind::Opaque(false))),
+        (_, BlockKind::Air) => Some((true, FaceKind::Opaque(false))),
+        (BlockKind::Water, _) => Some((false, FaceKind::Opaque(true))),
+        (_, BlockKind::Water) => Some((true, FaceKind::Opaque(true))),
+        _ => None,
+    }
+    /* let from_filled = from.is_filled();
    if from_filled == to.is_filled() {
        // Check the interface of liquid and non-tangible non-liquid (e.g. air).
-        let from_liquid = from.is_liquid();
-        if from_liquid == to.is_liquid() || /*from.is_filled() || to.is_filled()*/from_filled {
+        if from_filled {
+            None
+        } else {
+            let from_liquid = /*from.is_liquid()*/!from.is_air();
+            if from_liquid == /*to.is_liquid()*/!to.is_air()/*from.is_filled() || to.is_filled()*//* from_filled */ {
                None
            } else {
                // While liquid is not culled, we still try to keep a consistent orientation as
@ -687,18 +1053,19 @@ fn should_draw_greedy(
                // forwards-facing; otherwise, backwards-facing.
                Some((from_liquid, FaceKind::Fluid))
            }
+        }
    } else {
        // If going from unfilled to filled, backward facing; otherwise, forward
        // facing.  Also, if either from or to is fluid, set the meta accordingly.
        Some((
            from_filled,
            FaceKind::Opaque(if from_filled {
-                to.is_liquid()
+                /* to.is_liquid() */!to.is_air()
            } else {
-                from.is_liquid()
+                /* from.is_liquid() */!from.is_air()
            }),
        ))
-    }
+    } */
 }

 /// 1D Aabr
--- a/voxygen/src/scene/figure/cache.rs
+++ b/voxygen/src/scene/figure/cache.rs
@ -336,7 +336,7 @@ where
                let manifests = self.manifests.clone();
                let slot_ = Arc::clone(&slot);

-                slow_jobs.spawn("FIGURE_MESHING", move || {
+                slow_jobs.spawn(&"FIGURE_MESHING", move || {
                    // First, load all the base vertex data.
                    let meshes =
                        <Skel::Body as BodySpec>::bone_meshes(&key, &manifests, extra);
--- a/voxygen/src/scene/terrain.rs
+++ b/voxygen/src/scene/terrain.rs
@ -35,7 +35,7 @@ use core::{f32, fmt::Debug, marker::PhantomData, time::Duration};
 use crossbeam_channel as channel;
 use enum_iterator::IntoEnumIterator;
 use guillotiere::AtlasAllocator;
-use hashbrown::HashMap;
+use hashbrown::{hash_map, HashMap};
 use serde::Deserialize;
 use std::sync::{
    atomic::{AtomicU64, Ordering},
@ -103,11 +103,25 @@ pub struct TerrainChunkData {
    frustum_last_plane_index: u8,
 }

-#[derive(Copy, Clone)]
+#[derive(Clone, Copy, Eq, PartialEq)]
+enum ChunkWorkerStatus {
+    /// The worker is not currently active.
+    Invalid,
+    /// The worker is currently active and the chunk it is working on is up to date.
+    Active,
+    /// The worker was once active for this chunk (it may or may not currently be
+    /// active), but the chunk is stale and needs remeshing, so we want to process
+    /// it as soon as possible to prioritize the update.
+    Stale,
+}
+
+#[derive(Clone)]
 struct ChunkMeshState {
    pos: Vec2<i32>,
-    started_tick: u64,
-    is_worker_active: bool,
+    /// Only ever set from the main thread.  Always increasing, and it's okay to read a stale vlue
+    /// from the worker threads, so we can use Relaxed loads and stores.
+    started_tick: Arc<AtomicU64>,
+    status: ChunkWorkerStatus,
    // If this is set, we skip the actual meshing part of the update.
    skip_remesh: bool,
 }
@ -226,11 +240,13 @@ impl assets::Asset for SpriteSpec {
    const EXTENSION: &'static str = "ron";
 }

+type V = TerrainChunk;
+
 /// Function executed by worker threads dedicated to chunk meshing.

 /// skip_remesh is either None (do the full remesh, including recomputing the
 /// light map), or Some((light_map, glow_map)).
-fn mesh_worker<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + 'static>(
+fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug + 'static>*/(
    pos: Vec2<i32>,
    z_bounds: (f32, f32),
    skip_remesh: Option<(LightMapFn, LightMapFn)>,
@ -630,7 +646,7 @@ impl SpriteRenderContext {
    }
 }

-impl<V: RectRasterableVol> Terrain<V> {
+impl/*<V: RectRasterableVol>*/ Terrain<V> {
    pub fn new(
        client: &Client,
        renderer: &mut Renderer,
@ -754,7 +770,7 @@ impl<V: RectRasterableVol> Terrain<V> {
                    ..Default::default()
                },
            );
-            slowjob.spawn("TERRAIN_MESHING", move || {
+            slowjob.spawn(&"IMAGE_PROCESSING", move || {
                    // Construct the next atlas on a separate thread.  If it doesn't get sent, it means
                    // the original channel was dropped, which implies the terrain scene data no longer
                    // exists, so we can just drop the result in that case.
@ -781,20 +797,20 @@ impl<V: RectRasterableVol> Terrain<V> {
        Ok(col_light)
    }

-    fn remove_chunk_meta(&mut self, _pos: Vec2<i32>, chunk: &TerrainChunkData) {
+    fn remove_chunk_meta(atlas: &mut AtlasAllocator, _pos: Vec2<i32>, chunk: &TerrainChunkData) {
        // No need to free the allocation if the chunk is not allocated in the current
        // atlas, since we don't bother tracking it at that point.
        if let Some(col_lights) = chunk.col_lights_alloc {
-            self.atlas.deallocate(col_lights);
+            atlas.deallocate(col_lights);
        }
        /* let (zmin, zmax) = chunk.z_bounds;
        self.z_index_up.remove(Vec3::from(zmin, pos.x, pos.y));
        self.z_index_down.remove(Vec3::from(zmax, pos.x, pos.y)); */
    }

-    fn insert_chunk(&mut self, pos: Vec2<i32>, chunk: TerrainChunkData) {
-        if let Some(old) = self.chunks.insert(pos, chunk) {
-            self.remove_chunk_meta(pos, &old);
+    fn insert_chunk(chunks: &mut HashMap<Vec2<i32>, TerrainChunkData>, atlas: &mut AtlasAllocator, pos: Vec2<i32>, chunk: TerrainChunkData) {
+        if let Some(old) = chunks.insert(pos, chunk) {
+            Self::remove_chunk_meta(atlas, pos, &old);
        }
        /* let (zmin, zmax) = chunk.z_bounds;
        self.z_index_up.insert(Vec3::from(zmin, pos.x, pos.y));
@ -803,13 +819,15 @@ impl<V: RectRasterableVol> Terrain<V> {

    fn remove_chunk(&mut self, pos: Vec2<i32>) {
        if let Some(chunk) = self.chunks.remove(&pos) {
-            self.remove_chunk_meta(pos, &chunk);
+            Self::remove_chunk_meta(&mut self.atlas, pos, &chunk);
            // Temporarily remember dead chunks for shadowing purposes.
            self.shadow_chunks.push((pos, chunk));
        }

-        if let Some(_todo) = self.mesh_todo.remove(&pos) {
-            //Do nothing on todo mesh removal.
+        if let Some(todo) = self.mesh_todo.remove(&pos) {
+            // Update the old starting tick to u64::MAX so any chunk workers that haven't started
+            // yet can be canceled.
+            todo.started_tick.store(u64::MAX, Ordering::Relaxed);
        }
    }

@ -945,7 +963,11 @@ impl<V: RectRasterableVol> Terrain<V> {
            for i in -1..2 {
                for j in -1..2 {
                    if i != 0 || j != 0 {
-                        self.mesh_todo.remove(&(pos + Vec2::new(i, j)));
+                        if let Some(todo) = self.mesh_todo.remove(&(pos + Vec2::new(i, j))) {
+                            // Update the old starting tick to u64::MAX so any chunk workers that
+                            // haven't started yet can be canceled.
+                            todo.started_tick.store(u64::MAX, Ordering::Relaxed);
+                        }
                    }
                }
            }
@ -980,9 +1002,10 @@ impl<V: RectRasterableVol> Terrain<V> {
                for j in -1..2 {
                    let pos = pos + Vec2::new(i, j);

-                    if !(self.chunks.contains_key(&pos) || self.mesh_todo.contains_key(&pos))
-                        || modified
-                    {
+                    let entry = self.mesh_todo.entry(pos);
+                    let done_meshing = self.chunks.contains_key(&pos);
+                    let in_progress = done_meshing || matches!(entry, hash_map::Entry::Occupied(_));
+                    if modified || !in_progress {
                        let mut neighbours = true;
                        for i in -1..2 {
                            for j in -1..2 {
@ -992,12 +1015,22 @@ impl<V: RectRasterableVol> Terrain<V> {
                        }

                        if neighbours {
-                            self.mesh_todo.insert(pos, ChunkMeshState {
+                            let todo = entry.or_insert_with(|| ChunkMeshState {
                                pos,
-                                started_tick: current_tick,
-                                is_worker_active: false,
+                                started_tick: Arc::new(AtomicU64::new(current_tick)),
+                                status: ChunkWorkerStatus::Invalid,
                                skip_remesh: false,
                            });
+
+                            todo.skip_remesh = false;
+                            todo.started_tick.store(current_tick, Ordering::Relaxed);
+                            todo.status = if done_meshing/* || todo.status != ChunkWorkerStatus::Invalid*/ {
+                                // Make the new status stale, to make sure the chunk gets updated
+                                // promptly.
+                                ChunkWorkerStatus::Stale
+                            } else {
+                                ChunkWorkerStatus::Invalid
+                            };
                        }
                    }
                }
@ -1077,13 +1110,14 @@ impl<V: RectRasterableVol> Terrain<V> {
                        }
                    }
                    if neighbours {
+                        let done_meshing = self.chunks.contains_key(&neighbour_chunk_pos);
                        let todo =
                            self.mesh_todo
                                .entry(neighbour_chunk_pos)
-                                .or_insert(ChunkMeshState {
+                                .or_insert_with(|| ChunkMeshState {
                                    pos: neighbour_chunk_pos,
-                                    started_tick: current_tick,
-                                    is_worker_active: false,
+                                    started_tick: Arc::new(AtomicU64::new(current_tick)),
+                                    status: ChunkWorkerStatus::Invalid,
                                    skip_remesh,
                                });

@ -1094,8 +1128,15 @@ impl<V: RectRasterableVol> Terrain<V> {
                        // since otherwise the active remesh is computing new lighting values
                        // that we don't have yet.
                        todo.skip_remesh &= skip_remesh;
-                        todo.is_worker_active = false;
-                        todo.started_tick = current_tick;
+                        todo.started_tick.store(current_tick, Ordering::Relaxed);
+                        todo.status = if done_meshing/* || todo.status != ChunkWorkerStatus::Invalid*/ {
+                            // This chunk is now stale, and was already meshed before, so we
+                            // want to update it as soon as possible to make its update take
+                            // effect.
+                            ChunkWorkerStatus::Stale
+                        } else {
+                            ChunkWorkerStatus::Invalid
+                        };
                    }
                }
            }
@ -1116,20 +1157,20 @@ impl<V: RectRasterableVol> Terrain<V> {
        let mut todo = self
            .mesh_todo
            .values_mut()
-            .filter(|todo| !todo.is_worker_active)
+            .filter(|todo| todo.status != ChunkWorkerStatus::Active)
            // TODO: BinaryHeap
            .collect::<Vec<_>>();
        todo.sort_unstable_by_key(|todo| {
            (
                (todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>())
                    .distance_squared(mesh_focus_pos),
-                todo.started_tick,
+                todo.started_tick.load(Ordering::Relaxed),
            )
        });

        let slowjob = scene_data.state.slow_job_pool();
        for (todo, chunk) in todo.into_iter()
-            .filter(|todo| !todo.is_worker_active)
+            /* .filter(|todo| todo.status != ChunkWorkerStatus::Active) */
            /* .min_by_key(|todo| ((todo.pos.as_::<i64>() * TerrainChunk::RECT_SIZE.as_::<i64>()).distance_squared(mesh_focus_pos), todo.started_tick)) */
            // Find a reference to the actual `TerrainChunk` we're meshing
            ./*and_then*/filter_map(|todo| {
@ -1144,9 +1185,9 @@ impl<V: RectRasterableVol> Terrain<V> {
                    })?))
            })
        {
-            if self.mesh_todos_active.load(Ordering::Relaxed) > meshing_cores * 8 {
+            /* if self.mesh_todos_active.load(Ordering::Relaxed) > /* meshing_cores * 16 */CHUNKS_PER_SECOND as u64 / 60 {
                break;
-            }
+            } */

            // like ambient occlusion and edge elision, we also need the borders
            // of the chunk's neighbours too (hence the `- 1` and `+ 1`).
@ -1174,13 +1215,16 @@ impl<V: RectRasterableVol> Terrain<V> {
            };

            // The region to actually mesh
+            let mesh_filter = |pos: &Vec2<i32>|
+                pos.x == todo.pos.x && pos.y <= todo.pos.y ||
+                pos.y == todo.pos.y && pos.x <= todo.pos.x;
            let min_z = volume
                .iter()
-                .filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y)
+                .filter(|(pos, _)| mesh_filter(pos))
                .fold(i32::MAX, |min, (_, chunk)| chunk.get_min_z().min(min));
            let max_z = volume
                .iter()
-                .filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y)
+                .filter(|(pos, _)| mesh_filter(pos))
                .fold(i32::MIN, |max, (_, chunk)| chunk.get_max_z().max(max));

            let aabb = Aabb {
@ -1200,7 +1244,8 @@ impl<V: RectRasterableVol> Terrain<V> {
                .map(|chunk| (Arc::clone(&chunk.light_map), Arc::clone(&chunk.glow_map)));

            // Queue the worker thread.
-            let started_tick = todo.started_tick;
+            let started_tick_ = Arc::clone(&todo.started_tick);
+            let started_tick = started_tick_.load(Ordering::Relaxed);
            let sprite_data = Arc::clone(&self.sprite_data);
            let sprite_config = Arc::clone(&self.sprite_config);
            let cnt = Arc::clone(&self.mesh_todos_active);
@ -1209,8 +1254,14 @@ impl<V: RectRasterableVol> Terrain<V> {
            let create_instances = renderer.create_instances_lazy();
            let create_locals = renderer.create_terrain_bound_locals();
            cnt.fetch_add(1, Ordering::Relaxed);
-            slowjob
-                .spawn("TERRAIN_MESHING", move || {
+            let job = move || {
+                // Since this loads when the task actually *runs*, rather than when it's
+                // queued, it provides us with a good opportunity to check whether the chunk
+                // should be canceled.  We might miss updates, but that's okay, since canceling
+                // is just an optimization.
+                let started_tick_ = started_tick_.load(Ordering::Relaxed);
+                if started_tick_ <= started_tick {
+                    // This meshing job was not canceled.
                    let sprite_data = sprite_data;
                    let _ = send.send(mesh_worker(
                        pos,
@ -1228,9 +1279,17 @@ impl<V: RectRasterableVol> Terrain<V> {
                        create_instances,
                        create_locals,
                    ));
+                }
                cnt.fetch_sub(1, Ordering::Relaxed);
-                });
-            todo.is_worker_active = true;
+            };
+            if todo.status == ChunkWorkerStatus::Stale {
+                // The chunk was updated unexpectedly, so insert at the front, not the back, to see
+                // the update as soon as possible.
+                slowjob.spawn_front(&"TERRAIN_MESHING", job);
+            } else {
+                slowjob.spawn(&"TERRAIN_MESHING", job);
+            }
+            todo.status = ChunkWorkerStatus::Active;
        }
        drop(terrain);
        drop(guard);
@ -1243,16 +1302,21 @@ impl<V: RectRasterableVol> Terrain<V> {
        let recv_count =
            scene_data.state.get_delta_time() * CHUNKS_PER_SECOND + self.mesh_recv_overflow;
        self.mesh_recv_overflow = recv_count.fract();
+        let mesh_recv = &self.mesh_recv;
        let incoming_chunks =
-            std::iter::from_fn(|| self.mesh_recv.recv_timeout(Duration::new(0, 0)).ok())
-                .take(recv_count.floor() as usize)
-                .collect::<Vec<_>>(); // Avoid ownership issue
+            std::iter::from_fn(|| mesh_recv.try_recv().ok())
+                .take(recv_count.floor() as usize);
        for response in incoming_chunks {
            match self.mesh_todo.get(&response.pos) {
                // It's the mesh we want, insert the newly finished model into the terrain model
                // data structure (convert the mesh to a model first of course).
-                Some(todo) if response.started_tick <= todo.started_tick => {
-                    let started_tick = todo.started_tick;
+                Some(todo) => {
+                    let started_tick = todo.started_tick.load(Ordering::Relaxed);
+                    if response.started_tick > started_tick {
+                        // Chunk must have been removed, or it was spawned on an old tick. Drop
+                        // the mesh since it's either out of date or no longer needed.
+                        continue;
+                    }

                    let sprite_instances = response.sprite_instances;

@ -1331,7 +1395,7 @@ impl<V: RectRasterableVol> Terrain<V> {
                            load_time,
                        )]);

-                        self.insert_chunk(response.pos, TerrainChunkData {
+                        Self::insert_chunk(&mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData {
                            load_time,
                            opaque_model: mesh.opaque_model,
                            fluid_model: mesh.fluid_model,
@ -1360,12 +1424,11 @@ impl<V: RectRasterableVol> Terrain<V> {
                    }

                    if response.started_tick == started_tick {
+                        // This was the latest worker for this chunk, so we don't need to worry
+                        // about canceling any later tasks.
                        self.mesh_todo.remove(&response.pos);
                    }
                },
-                // Chunk must have been removed, or it was spawned on an old tick. Drop the mesh
-                // since it's either out of date or no longer needed.
-                Some(_todo) => {},
                None => {},
            }
        }
--- a/voxygen/src/ui/graphic/mod.rs
+++ b/voxygen/src/ui/graphic/mod.rs
@ -10,7 +10,7 @@ use crate::{
 use common::{figure::Segment, slowjob::SlowJobPool};
 use guillotiere::{size2, SimpleAtlasAllocator};
 use hashbrown::{hash_map::Entry, HashMap};
-use image::{DynamicImage, RgbaImage};
+use image::{DynamicImage, GenericImageView, RgbaImage};
 use pixel_art::resize_pixel_art;
 use slab::Slab;
 use std::{hash::Hash, sync::Arc};
@ -411,6 +411,20 @@ fn draw_graphic(
        // Short-circuit spawning a job on the threadpool for blank graphics
        Some(Graphic::Blank) => None,
        Some(inner) => {
+            // HACK: Only use the pool for "large" graphics (greater than 32x32 source images,
+            // which is the size of the minimap images).
+            //
+            // FIXME: Proper flickering solution.
+            let pool = if let Graphic::Image(image, _) = inner {
+                let (w, h) = image.dimensions();
+                if w.saturating_mul(h) > 256 * 256 {
+                    pool
+                } else {
+                    None
+                }
+            } else {
+                pool
+            };
            keyed_jobs
                .spawn(pool, (graphic_id, dims), || {
                    let inner = inner.clone();
--- a/voxygen/src/ui/keyed_jobs.rs
+++ b/voxygen/src/ui/keyed_jobs.rs
@ -63,7 +63,7 @@ impl<K: Hash + Eq + Send + Sync + 'static + Clone, V: Send + Sync + 'static> Key
                                // which means that it completed while we tried to GC its pending
                                // struct, which means that we'll GC it in the next cycle, so ignore
                                // the error in this collection.
-                                let _ = pool.cancel(job);
+                                let _ = job.cancel();
                            }
                        }
                        fresh
@ -89,7 +89,7 @@ impl<K: Hash + Eq + Send + Sync + 'static + Clone, V: Send + Sync + 'static> Key
                    // approximating that
                    let tx = self.tx.clone();
                    let f = f();
-                    let job = pool.spawn(self.name, move || {
+                    let job = pool.spawn(&self.name, move || {
                        let v = f(&k);
                        let _ = tx.send((k, v));
                    });
--- a/world/src/layer/cave.rs
+++ b/world/src/layer/cave.rs
@ -600,7 +600,7 @@ fn write_column<R: Rng>(
    for z in bedrock..z_range.end {
        let wpos = wpos2d.with_z(z);
        let mut try_spawn_entity = false;
-        canvas.map(wpos, |_block| {
+        canvas.set(wpos, {
            if z < z_range.start - 4 && !void_below {
                Block::new(BlockKind::Lava, Rgb::new(255, 65, 0))
            } else if basalt > 0.0
@ -844,7 +844,7 @@ fn write_column<R: Rng>(
            {
                Block::air(sprite)
            } else {
-                get_mushroom(wpos, rng).unwrap_or(Block::air(SpriteKind::Empty))
+                /*get_mushroom(wpos, rng).unwrap_or(*/Block::air(SpriteKind::Empty)/*)*/
            }
        });