diff --git a/Cargo.lock b/Cargo.lock index d74cea91bc..9af16dfb50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -155,6 +155,11 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arbalest" +version = "0.2.1" +source = "git+https://gitlab.com/veloren/arbalest.git?rev=9cb8f67a4f6d8f3cc908dac4eb5eb8aec9fab07b#9cb8f67a4f6d8f3cc908dac4eb5eb8aec9fab07b" + [[package]] name = "arr_macro" version = "0.1.3" @@ -6510,6 +6515,7 @@ name = "veloren-common" version = "0.10.0" dependencies = [ "approx 0.4.0", + "arbalest", "bitflags", "bitvec", "bumpalo", @@ -6824,6 +6830,7 @@ dependencies = [ "assets_manager", "backtrace", "bincode", + "bumpalo", "bytemuck", "chrono", "chumsky", diff --git a/assets/common/abilities/debug/forwardboost.ron b/assets/common/abilities/debug/forwardboost.ron index dc198e3d62..0182763358 100644 --- a/assets/common/abilities/debug/forwardboost.ron +++ b/assets/common/abilities/debug/forwardboost.ron @@ -1,6 +1,6 @@ Boost( movement_duration: 0.05, only_up: false, - speed: 400.0, + speed: 3400.0, max_exit_velocity: 100.0, -) \ No newline at end of file +) diff --git a/assets/world/features.ron b/assets/world/features.ron index c7d3319968..27aef3b84a 100644 --- a/assets/world/features.ron +++ b/assets/world/features.ron @@ -3,7 +3,7 @@ ( caverns: false, // TODO: Disabled by default until cave overhaul - caves: true, + caves: false, rocks: true, shrubs: true, trees: true, diff --git a/client/src/lib.rs b/client/src/lib.rs index 457411544c..4115e4a29c 100644 --- a/client/src/lib.rs +++ b/client/src/lib.rs @@ -280,7 +280,8 @@ pub struct CharacterList { pub loading: bool, } -const TOTAL_PENDING_CHUNKS_LIMIT: usize = 1024; +/// Higher than what's needed at VD = 65. +const TOTAL_PENDING_CHUNKS_LIMIT: usize = /*1024*/13800; impl Client { pub async fn new( @@ -1874,8 +1875,8 @@ impl Client { if !skip_mode && !self.pending_chunks.contains_key(key) { const CURRENT_TICK_PENDING_CHUNKS_LIMIT: usize = 8 * 4; if self.pending_chunks.len() < TOTAL_PENDING_CHUNKS_LIMIT - && current_tick_send_chunk_requests - < CURRENT_TICK_PENDING_CHUNKS_LIMIT + && /* current_tick_send_chunk_requests + < CURRENT_TICK_PENDING_CHUNKS_LIMIT */true { self.send_msg_err(ClientGeneral::TerrainChunkRequest { key: *key, diff --git a/common/Cargo.toml b/common/Cargo.toml index cd5d4f65b6..31745e5fc0 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -79,6 +79,7 @@ petgraph = { version = "0.6", optional = true } kiddo = { version = "0.1", optional = true } # Data structures +arbalest = { git = "https://gitlab.com/veloren/arbalest.git", rev = "9cb8f67a4f6d8f3cc908dac4eb5eb8aec9fab07b", features = ["nightly"] } hashbrown = { version = "0.12", features = ["rayon", "serde", "nightly"] } slotmap = { version = "1.0", features = ["serde"] } indexmap = { version = "1.3.0", features = ["rayon"] } diff --git a/common/src/lib.rs b/common/src/lib.rs index 857fa2a1ce..1c0223d864 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -8,6 +8,8 @@ array_chunks, associated_type_defaults, bool_to_option, + coerce_unsized, + dispatch_from_dyn, fundamental, generic_const_exprs, generic_arg_infer, @@ -17,6 +19,7 @@ slice_as_chunks, trait_alias, type_alias_impl_trait, + unsize, extend_one, arbitrary_enum_discriminant, generic_associated_types, diff --git a/common/src/slowjob.rs b/common/src/slowjob.rs index 414d4888f8..5b3051e1a8 100644 --- a/common/src/slowjob.rs +++ b/common/src/slowjob.rs @@ -1,4 +1,11 @@ -use hashbrown::HashMap; +use arbalest::sync::{Strong, Frail}; +use core::{ + fmt, + marker::Unsize, + ops::{CoerceUnsized, DispatchFromDyn}, + sync::atomic::{AtomicBool, Ordering} +}; +use hashbrown::{hash_map::Entry, HashMap}; use rayon::ThreadPool; use std::{ collections::VecDeque, @@ -47,25 +54,27 @@ use tracing::{error, warn}; #[derive(Clone)] pub struct SlowJobPool { internal: Arc>, + threadpool: Arc, } +type Name = /*String*/&'static str; + #[derive(Debug)] pub struct SlowJob { - name: String, - id: u64, + task: Frail, } +// impl + CoerceUnsized, U: ?Sized> CoerceUnsized> for Task {} + struct InternalSlowJobPool { - next_id: u64, - queue: HashMap>, - configs: HashMap, - last_spawned_configs: Vec, + cur_slot: usize, + queue: HashMap>>, + configs: HashMap, + last_spawned_configs: Vec, global_spawned_and_running: u64, global_limit: u64, jobs_metrics_cnt: usize, - jobs_metrics: HashMap>, - threadpool: Arc, - internal: Option>>, + jobs_metrics: HashMap>, } #[derive(Debug)] @@ -74,46 +83,42 @@ struct Config { local_spawned_and_running: u64, } -struct Queue { - id: u64, - name: String, - task: Box, +#[derive(Debug)] +struct Task { + queue_created: Instant, + /// Has this task been canceled? + is_canceled: AtomicBool, + /// The actual task function. Technically, the Option is unnecessary, since we'll only ever + /// run it once, but the performance improvement doesn't justify unsafe in this case. + task: F, } +/// NOTE: Should be FnOnce, but can't because there's no easy way to run an FnOnce function on an +/// Arc even if [try_unwrap] would work. We could write unsafe code to do this, but it probably +/// isn't worth it. +type Queue = Task; + pub struct JobMetrics { pub queue_created: Instant, pub execution_start: Instant, pub execution_end: Instant, } -impl Queue { - fn new(name: &str, id: u64, internal: &Arc>, f: F) -> Self - where - F: FnOnce() + Send + Sync + 'static, +impl Task { + fn new(f: F) -> Task + where F: FnOnce() + Send + Sync + 'static { - let internal = Arc::clone(internal); - let name_cloned = name.to_owned(); let queue_created = Instant::now(); - Self { - id, - name: name.to_owned(), - task: Box::new(move || { - common_base::prof_span_alloc!(_guard, &name_cloned); - let execution_start = Instant::now(); - f(); - let execution_end = Instant::now(); - let metrics = JobMetrics { - queue_created, - execution_start, - execution_end, - }; - // directly maintain the next task afterwards - { - let mut lock = internal.lock().expect("slowjob lock poisoned"); - lock.finish(&name_cloned, metrics); - lock.spawn_queued(); + let mut f = Some(f); + Task { + queue_created, + is_canceled: AtomicBool::new(false), + task: move || { + // Working around not being able to call FnOnce in an Arc. + if let Some(f) = f.take() { + f(); } - }), + }, } } } @@ -122,41 +127,33 @@ impl InternalSlowJobPool { pub fn new( global_limit: u64, jobs_metrics_cnt: usize, - threadpool: Arc, - ) -> Arc> { - let link = Arc::new(Mutex::new(Self { - next_id: 0, + ) -> Self { + Self { queue: HashMap::new(), configs: HashMap::new(), + cur_slot: 0, last_spawned_configs: Vec::new(), global_spawned_and_running: 0, global_limit: global_limit.max(1), jobs_metrics_cnt, jobs_metrics: HashMap::new(), - threadpool, - internal: None, - })); - - let link_clone = Arc::clone(&link); - link.lock() - .expect("poisoned on InternalSlowJobPool::new") - .internal = Some(link_clone); - link + } } /// returns order of configuration which are queued next fn calc_queued_order( &self, - mut queued: HashMap<&String, u64>, + mut queued: HashMap<&Name, u64>, mut limit: usize, - ) -> Vec { + ) -> Vec { let mut roundrobin = self.last_spawned_configs.clone(); let mut result = vec![]; - let spawned = self + /* let spawned = self .configs .iter() .map(|(n, c)| (n, c.local_spawned_and_running)) - .collect::>(); + .collect::>(); */ + let spawned = &self.configs; let mut queried_capped = self .configs .iter() @@ -174,7 +171,7 @@ impl InternalSlowJobPool { // grab all configs that are queued and not running. in roundrobin order for n in roundrobin.clone().into_iter() { if let Some(c) = queued.get_mut(&n) { - if *c > 0 && spawned.get(&n).cloned().unwrap_or(0) == 0 { + if *c > 0 && spawned.get(&n).map(|c| c.local_spawned_and_running).unwrap_or(0) == 0 { result.push(n.clone()); *c -= 1; limit -= 1; @@ -212,7 +209,7 @@ impl InternalSlowJobPool { match spawn_rates.first_mut() { Some((n, r)) => { if *r > f32::EPSILON { - result.push(n.clone()); + result.push(n.to_owned()); limit -= 1; *r -= 1.0; } else { @@ -225,7 +222,7 @@ impl InternalSlowJobPool { result } - fn can_spawn(&self, name: &str) -> bool { + fn can_spawn(&self, name: &Name) -> bool { let queued = self .queue .iter() @@ -247,31 +244,33 @@ impl InternalSlowJobPool { to_be_queued_cnt > queued_cnt } - pub fn spawn(&mut self, name: &str, f: F) -> SlowJob + fn spawn(&mut self, slowjob: &SlowJobPool, push_back: bool, name: &Name, f: F) -> SlowJob where F: FnOnce() + Send + Sync + 'static, { - let id = self.next_id; - self.next_id += 1; - let queue = Queue::new(name, id, self.internal.as_ref().expect("internal empty"), f); - self.queue - .entry(name.to_string()) - .or_default() - .push_back(queue); + let queue: Strong = Strong::new(Task::new(f)); + let mut deque = self.queue + .entry(name.to_owned()) + .or_default(); + let job = SlowJob { + task: Strong::downgrade(&queue) + }; + if push_back { + deque.push_back(queue); + } else { + deque.push_front(queue); + } debug_assert!( self.configs.contains_key(name), "Can't spawn unconfigured task!" ); //spawn already queued - self.spawn_queued(); - SlowJob { - name: name.to_string(), - id, - } + self.spawn_queued(slowjob); + job } - fn finish(&mut self, name: &str, metrics: JobMetrics) { - let metric = self.jobs_metrics.entry(name.to_string()).or_default(); + fn finish(&mut self, name: &Name, metrics: JobMetrics) { + let metric = self.jobs_metrics.entry(name.to_owned()).or_default(); if metric.len() < self.jobs_metrics_cnt { metric.push(metrics); @@ -284,8 +283,87 @@ impl InternalSlowJobPool { } } - fn spawn_queued(&mut self) { - let queued = self + /// NOTE: This does not spawn the job directly, but it *does* increment cur_slot and the local + /// and global task counters, so make sure to actually finish the returned jobs if you consume + /// the iterator, or the position in the queue may be off! + #[must_use = "Remember to actually use the returned jobs if you consume the iterator."] + fn next_jobs<'a>(&'a mut self) -> impl Iterator)> + 'a { + let queued = &mut self.queue; + let configs = &mut self.configs; + let global_spawned_and_running = &mut self.global_spawned_and_running; + + let cur_slot = &mut self.cur_slot; + let num_slots = self.last_spawned_configs.len(); + let jobs_limit = self.global_limit.saturating_sub(*global_spawned_and_running) as usize; + + let queued_order = self.last_spawned_configs.iter().cycle().skip(*cur_slot).take(num_slots); + queued_order + // NOTE: num_slots > 0, because queued_order can only yield up to num_slots elements. + .inspect(move |_| { *cur_slot = (*cur_slot + 1) % num_slots; }) + .filter_map(move |name| { + let deque = queued.get_mut(name)?; + let config = configs.get_mut(name)?; + if /* config.local_spawned_and_running < config.local_limit*/true { + let task = deque.pop_front()?; + config.local_spawned_and_running += 1; + *global_spawned_and_running += 1; + Some((name.to_owned(), task)) + } else { + None + } + }) + .take(jobs_limit) + } + + /// Spawn tasks in the threadpool, in round-robin order. + /// + /// NOTE: Do *not* call this from an existing thread in the threadpool. + fn spawn_queued(&mut self, slowjob: &SlowJobPool) { + /* let old_running = self.global_spawned_and_running; */ + while self.next_jobs().map(|task| slowjob.spawn_in_threadpool(task)).count() != 0 {} + /* let total_spawned = (self.global_spawned_and_running - old_running) as usize; + self.cur_slot = (initial_slot + total_spawned) % num_slots; + self.cur_slot %= num_slots; */ + /* let queued = self + .queue + .iter_mut(); + /* .iter(); + .map(|(n, m)| (n, m.len() as u64)) + .collect::>(); + let limit = self.global_limit as usize; + let queued_order = self.calc_queued_order(queued, limit); */ + + let queued_order = queued; + for (name, deque) in queued_order/*.into_iter()*/.take(self.global_limit.saturating_sub(self.global_spawned_and_running) as usize) { + /* match self.queue.get_mut(&name) { + Some(deque) => */match deque.pop_front() { + Some(queue) => { + //fire + self.global_spawned_and_running += 1; + self.configs + .get_mut(&queue.name) + .expect("cannot fire a unconfigured job") + .local_spawned_and_running += 1; + self.last_spawned_configs + .iter() + .position(|e| e == &queue.name) + .map(|i| self.last_spawned_configs.remove(i)); + self.last_spawned_configs.push((&queue.name).to_owned()); + self.threadpool.spawn(queue.task); + }, + None => /* error!( + "internal calculation is wrong, we extected a schedulable job to be \ + present in the queue" + ),*/{} + }/*, + None => error!( + "internal calculation is wrong, we marked a queue as schedulable which \ + doesn't exist" + ), + } */ + } */ + + /* let queued = self .queue .iter() .map(|(n, m)| (n, m.len() as u64)) @@ -306,7 +384,7 @@ impl InternalSlowJobPool { .iter() .position(|e| e == &queue.name) .map(|i| self.last_spawned_configs.remove(i)); - self.last_spawned_configs.push(queue.name.to_owned()); + self.last_spawned_configs.push((&queue.name).to_owned()); self.threadpool.spawn(queue.task); }, None => error!( @@ -319,77 +397,186 @@ impl InternalSlowJobPool { doesn't exist" ), } - } + } */ } - pub fn take_metrics(&mut self) -> HashMap> { + pub fn take_metrics(&mut self) -> HashMap> { core::mem::take(&mut self.jobs_metrics) } } + +impl SlowJob { + /// TODO: This would be simplified (and perform a bit better) if there existed a structure that + /// "split" an Arc allocation into two parts, a shared and owned part (with types corresponding + /// to references to each). The strong type would not be cloneable and would grant mutable + /// access to the owned part, and shared access to the shared part; the weak type would be + /// cloneable, but would only shared access to the shared part, and no access to the owned + /// part. This would allow us to share the allocation, without needing to keep track of an + /// explicit weak pointer count, perform any sort of locking on cancelation, etc. + /// Unfortunately I cannot find such a type on crates.io, and writing one would be a fairly + /// involved task, so we defer this for now. + pub fn cancel(self) -> Result<(), Self> { + // Correctness of cancellation is a bit subtle, due to wanting to avoid locking the queue + // more than necessary, iterate over jobs, or introduce a way to access jobs by key. + // + // First, we try to upgrade our weak reference to the Arc. This will fail if the strong + // reference is currently mutably borrowed, or if the strong side has already been + // dropped. Since we never mutably borrow the reference until we're definitely going to + // run the task, and we only drop the strong side after the task is complete, this is + // a conservative signal that there's no point in cancelling the task, so this has no + // false positives. + let task = self.task.try_upgrade().or(Err(self))?; + // Now that the task is upgraded, any attempt by the strong side to mutably access the + // task will fail, so it will assume it's been canceled. This is fine, because we're + // about to cancel it anyway. + // + // Next, we try to signal (monotonically) that the task should be cancelled, by updating + // the value of canceled atomically to true. Since this is monotonic, we can use Relaxed + // here. It would actually be fine if this signal was lost, since cancellation is always + // an optimization, but with the current implementation it won't be--the strong side only + // checks for cancellation after it tries to mutably access the task, which can't happen + // while the task is "locked" by our weak upgrade, so our write here will always be + // visible. + task.is_canceled.store(true, Ordering::Relaxed); + // Note that we don't bother to check whether the task was already canceled. Firstly, + // because we don't care, secondly because even if we did, this function takes ownership of + // the SlowJob, which contains the only weak reference with the ability to cancel, so no + // job can be canceled more than once anyway. + Ok(()) + } +} + impl SlowJobPool { pub fn new(global_limit: u64, jobs_metrics_cnt: usize, threadpool: Arc) -> Self { Self { - internal: InternalSlowJobPool::new(global_limit, jobs_metrics_cnt, threadpool), + internal: Arc::new(Mutex::new(InternalSlowJobPool::new(global_limit, jobs_metrics_cnt))), + threadpool, } } /// configure a NAME to spawn up to f(n) threads, depending on how many /// threads we globally have available - pub fn configure(&self, name: &str, f: F) + pub fn configure(&self, name: &Name, f: F) where F: Fn(u64) -> u64, { let mut lock = self.internal.lock().expect("lock poisoned while configure"); - let cnf = Config { - local_limit: f(lock.global_limit).max(1), - local_spawned_and_running: 0, - }; - lock.configs.insert(name.to_owned(), cnf); - lock.last_spawned_configs.push(name.to_owned()); + let lock = &mut *lock; + // Make sure not to update already-present config entries, since this can mess up some of + // the stuff we do that assumes monotonicity. + if let Entry::Vacant(v) = lock.configs.entry(name.to_owned()) { + let cnf = Config { + local_limit: f(lock.global_limit).max(1), + local_spawned_and_running: 0, + }; + let cnf = v.insert(cnf); + // Add cnf into the entry list as many times as its local limit, to ensure that stuff + // gets run more often if it has more assigned threads. + lock.last_spawned_configs.resize(lock.last_spawned_configs.len() + /* cnf.local_limit as usize */1, name.to_owned()); + } + } + + /// Spawn a task in the threadpool. + /// + /// This runs the task, and then checks at the end to see if there are any more tasks to run + /// before returning for good. In cases with lots of tasks, this may help avoid unnecessary + /// context switches or extra threads being spawned unintentionally. + fn spawn_in_threadpool(&self, mut name_task: (Name, Strong)) { + let internal = Arc::clone(&self.internal); + + // NOTE: It's important not to use internal until we're in the spawned thread, since the + // lock is probably currently taken! + self.threadpool.spawn(move || { + // Repeatedly run until exit; we do things this way to avoid recursion, which might blow + // our call stack. + loop { + let (name, mut task) = name_task; + let queue_created = task.queue_created; + // See the [SlowJob::cancel] method for justification for this step's correctness. + // + // NOTE: This is not exact because we do it before borrowing the task, but the + // difference is minor and it makes it easier to assign metrics to canceled tasks + // (though maybe we don't want to do that?). + let execution_start = Instant::now(); + if let Some(mut task) = Strong::try_borrow_mut(&mut task) + .ok() + .filter(|task| !task.is_canceled.load(Ordering::Relaxed)) { + // The task was not canceled. + // + // Run the task in its own scope so perf works correctly. + common_base::prof_span_alloc!(_guard, &name); + (task.task)(); + } + let execution_end = Instant::now(); + let metrics = JobMetrics { + queue_created, + execution_start, + execution_end, + }; + // directly maintain the next task afterwards + let next_task = { + // We take the lock in this scope to make sure it's dropped before we + // actully launch the next job. + let mut lock = internal.lock().expect("slowjob lock poisoned"); + let lock = &mut *lock; + lock.finish(&name, metrics); + let mut jobs = lock.next_jobs(); + jobs.next() + }; + name_task = if let Some(name_task) = next_task { + // We launch the job on the *same* thread, since we're already in the + // thread pool. + name_task + } else { + // There are no more tasks to run at this time, so we exit the thread in + // the threadpool (in theory, it might make sense to yield or spin a few + // times or something in case we have more tasks to execute). + return; + }; + } + }); } /// spawn a new slow job on a certain NAME IF it can run immediately #[allow(clippy::result_unit_err)] - pub fn try_run(&self, name: &str, f: F) -> Result + pub fn try_run(&self, name: &Name, f: F) -> Result where F: FnOnce() + Send + Sync + 'static, { let mut lock = self.internal.lock().expect("lock poisoned while try_run"); + let lock = &mut *lock; //spawn already queued - lock.spawn_queued(); + lock.spawn_queued(self); if lock.can_spawn(name) { - Ok(lock.spawn(name, f)) + Ok(lock.spawn(self, true, name, f)) } else { Err(()) } } - pub fn spawn(&self, name: &str, f: F) -> SlowJob + pub fn spawn(&self, name: &Name, f: F) -> SlowJob where F: FnOnce() + Send + Sync + 'static, { self.internal .lock() .expect("lock poisoned while spawn") - .spawn(name, f) + .spawn(self, true, name, f) } - pub fn cancel(&self, job: SlowJob) -> Result<(), SlowJob> { - let mut lock = self.internal.lock().expect("lock poisoned while cancel"); - if let Some(m) = lock.queue.get_mut(&job.name) { - let p = match m.iter().position(|p| p.id == job.id) { - Some(p) => p, - None => return Err(job), - }; - if m.remove(p).is_some() { - return Ok(()); - } - } - Err(job) + /// Spawn at the front of the queue, which is preferrable in some cases. + pub fn spawn_front(&self, name: &Name, f: F) -> SlowJob + where + F: FnOnce() + Send + Sync + 'static, + { + self.internal + .lock() + .expect("lock poisoned while spawn") + .spawn(self, false, name, f) } - pub fn take_metrics(&self) -> HashMap> { + pub fn take_metrics(&self) -> HashMap> { self.internal .lock() .expect("lock poisoned while take_metrics") @@ -413,7 +600,7 @@ mod tests { .num_threads(pool_threads) .build() .unwrap(); - let pool = SlowJobPool::new(global_threads, metrics, Arc::new(threadpool)); + let pool = SlowJobPool::new(global_threads, metrics, threadpool); if foo != 0 { pool.configure("FOO", |x| x / foo); } diff --git a/server/src/chunk_generator.rs b/server/src/chunk_generator.rs index 3b07591a24..ec4ef06562 100644 --- a/server/src/chunk_generator.rs +++ b/server/src/chunk_generator.rs @@ -55,7 +55,7 @@ impl ChunkGenerator { v.insert(Arc::clone(&cancel)); let chunk_tx = self.chunk_tx.clone(); self.metrics.chunks_requested.inc(); - slowjob_pool.spawn("CHUNK_GENERATOR", move || { + slowjob_pool.spawn(&"CHUNK_GENERATOR", move || { let index = index.as_index_ref(); let payload = world .generate_chunk(index, key, || cancel.load(Ordering::Relaxed), Some(time)) diff --git a/server/src/lib.rs b/server/src/lib.rs index fbe324a263..37e5aac0da 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -293,8 +293,8 @@ impl Server { } { let pool = state.ecs_mut().write_resource::(); - pool.configure("CHUNK_GENERATOR", |n| n / 2 + n / 4); - pool.configure("CHUNK_SERIALIZER", |n| n / 2); + pool.configure(&"CHUNK_GENERATOR", |n| n / 2 + n / 4); + pool.configure(&"CHUNK_SERIALIZER", |n| n / 2); } state .ecs_mut() diff --git a/server/src/sys/chunk_serialize.rs b/server/src/sys/chunk_serialize.rs index a35b9ea6e5..d014ee0b5d 100644 --- a/server/src/sys/chunk_serialize.rs +++ b/server/src/sys/chunk_serialize.rs @@ -117,7 +117,7 @@ impl<'a> System<'a> for Sys { while chunks_iter.peek().is_some() { let chunks: Vec<_> = chunks_iter.by_ref().take(CHUNK_SIZE).collect(); let chunk_sender = chunk_sender.clone(); - slow_jobs.spawn("CHUNK_SERIALIZER", move || { + slow_jobs.spawn(&"CHUNK_SERIALIZER", move || { for (chunk, chunk_key, mut meta) in chunks { let msg = Client::prepare_chunk_update_msg( ServerGeneral::TerrainChunkUpdate { diff --git a/voxygen/Cargo.toml b/voxygen/Cargo.toml index c06c510a5c..93bc00c643 100644 --- a/voxygen/Cargo.toml +++ b/voxygen/Cargo.toml @@ -95,6 +95,7 @@ server = { package = "veloren-server", path = "../server", optional = true, defa assets_manager = {version = "0.8", features = ["ab_glyph"]} backtrace = "0.3.40" bincode = "1.3.1" +bumpalo = "3.10.0" chrono = { version = "0.4.19", features = ["serde"] } chumsky = "0.8" cpal = "0.13" diff --git a/voxygen/src/ecs/mod.rs b/voxygen/src/ecs/mod.rs index 8b7b647b38..fc019d0dbb 100644 --- a/voxygen/src/ecs/mod.rs +++ b/voxygen/src/ecs/mod.rs @@ -11,9 +11,9 @@ pub fn init(world: &mut World) { { let pool = world.read_resource::(); - pool.configure("IMAGE_PROCESSING", |n| n / 2); - pool.configure("FIGURE_MESHING", |n| n / 2); - pool.configure("TERRAIN_MESHING", |n| n / 2); + pool.configure(&"IMAGE_PROCESSING", |n| n / 2); + pool.configure(&"FIGURE_MESHING", |n| n / 4); + pool.configure(&"TERRAIN_MESHING", |n| n / 2); } // Voxygen event buses diff --git a/voxygen/src/hud/mod.rs b/voxygen/src/hud/mod.rs index 7f020361bb..777bc07d3d 100644 --- a/voxygen/src/hud/mod.rs +++ b/voxygen/src/hud/mod.rs @@ -4295,11 +4295,10 @@ impl Hud { // TODO: using a thread pool in the obvious way for speeding up map zoom results // in flickering artifacts, figure out a better way to make use of the // thread pool - let _pool = client.state().ecs().read_resource::(); + let pool = client.state().slow_job_pool(); self.ui.maintain( global_state.window.renderer_mut(), - None, - //Some(&pool), + Some(&pool), Some(proj_mat * view_mat * Mat4::translation_3d(-focus_off)), ); diff --git a/voxygen/src/lib.rs b/voxygen/src/lib.rs index 7df571364f..c71cada8a8 100644 --- a/voxygen/src/lib.rs +++ b/voxygen/src/lib.rs @@ -4,6 +4,7 @@ #![deny(clippy::clone_on_ref_ptr)] #![feature( array_methods, + array_windows, array_zip, bool_to_option, drain_filter, diff --git a/voxygen/src/menu/char_selection/ui/mod.rs b/voxygen/src/menu/char_selection/ui/mod.rs index ac8584cd23..e5d4a726cf 100644 --- a/voxygen/src/menu/char_selection/ui/mod.rs +++ b/voxygen/src/menu/char_selection/ui/mod.rs @@ -1728,12 +1728,13 @@ impl CharSelectionUi { pub fn maintain(&mut self, global_state: &mut GlobalState, client: &Client) -> Vec { let mut events = Vec::new(); let i18n = global_state.i18n.read(); + let pool = client.state().slow_job_pool(); let (mut messages, _) = self.ui.maintain( self.controls .view(&global_state.settings, client, &self.error, &i18n), global_state.window.renderer_mut(), - None, + Some(&pool), &mut global_state.clipboard, ); diff --git a/voxygen/src/mesh/greedy.rs b/voxygen/src/mesh/greedy.rs index 496dca8738..ef0574243d 100644 --- a/voxygen/src/mesh/greedy.rs +++ b/voxygen/src/mesh/greedy.rs @@ -9,7 +9,7 @@ type TodoRect = ( Vec3, ); -pub struct GreedyConfig { +pub struct GreedyConfig { pub data: D, /// The minimum position to mesh, in the coordinate system used /// for queries against the volume. @@ -31,6 +31,9 @@ pub struct GreedyConfig { /// the number of *horizontal* planes large enough to cover the whole /// chunk. pub greedy_size_cross: Vec3, + /// Given a position, return the full information for the voxel at that + /// position. + pub get_vox: FV, /// Given a position, return the AO information for the voxel at that /// position (0.0 - 1.0). pub get_ao: FA, @@ -104,7 +107,23 @@ fn guillotiere_size>(size: Vec2) -> guillotiere::Size { guillotiere::Size::new(size.x.into(), size.y.into()) } -/// Currently used by terrain/particles/figures +/// Currently used by terrain +pub fn terrain_config() -> guillotiere::AllocatorOptions { + // TODO: Collect information to see if we can choose a good value here. These + // current values were optimized for sprites, but we are using a + // different allocator for them so different values might be better + // here. + let large_size_threshold = /*16*//*32*/8; //256.min(min_max_dim / 2 + 1); + let small_size_threshold = /*4*//*4*/3; //33.min(large_size_threshold / 2 + 1); + + guillotiere::AllocatorOptions { + alignment: guillotiere::Size::new(1, 1), + small_size_threshold, + large_size_threshold, + } +} + +/// Currently used by particles/figures pub fn general_config() -> guillotiere::AllocatorOptions { // TODO: Collect information to see if we can choose a good value here. These // current values were optimized for sprites, but we are using a @@ -370,15 +389,16 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> { /// Returns an estimate of the bounds of the current meshed model. /// /// For more information on the config parameter, see [GreedyConfig]. - pub fn push( + pub fn push( &mut self, - config: GreedyConfig, + config: GreedyConfig, ) where + FV: for<'r> FnMut(&'r mut D, Vec3) -> V + 'a, FA: for<'r> FnMut(&'r mut D, Vec3) -> f32 + 'a, FL: for<'r> FnMut(&'r mut D, Vec3) -> f32 + 'a, FG: for<'r> FnMut(&'r mut D, Vec3) -> f32 + 'a, FO: for<'r> FnMut(&'r mut D, Vec3) -> bool + 'a, - FS: for<'r> FnMut(&'r mut D, Vec3, Vec3, Vec2>) -> Option<(bool, M)>, + FS: for<'r> FnMut(&'r mut D, Vec3, V, V, /*Vec3, */Vec2>) -> Option<(bool, M)>, FP: FnMut(Vec2, Vec2>, Vec3, Vec2>, Vec3, &M), FT: for<'r> FnMut(&'r mut D, Vec3, u8, u8, bool) -> [u8; 4] + 'a, { @@ -418,7 +438,7 @@ impl<'a, Allocator: AtlasAllocator> GreedyMesh<'a, Allocator> { pub fn max_size(&self) -> Vec2 { self.max_size } } -fn greedy_mesh<'a, M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT, Allocator: AtlasAllocator>( +fn greedy_mesh<'a, M: PartialEq, D: 'a, V: 'a, FV, FA, FL, FG, FO, FS, FP, FT, Allocator: AtlasAllocator>( atlas: &mut Allocator, col_lights_size: &mut Vec2, max_size: Vec2, @@ -427,6 +447,7 @@ fn greedy_mesh<'a, M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT, Allocator: A draw_delta, greedy_size, greedy_size_cross, + mut get_vox, get_ao, get_light, get_glow, @@ -434,14 +455,15 @@ fn greedy_mesh<'a, M: PartialEq, D: 'a, FA, FL, FG, FO, FS, FP, FT, Allocator: A mut should_draw, mut push_quad, make_face_texel, - }: GreedyConfig, + }: GreedyConfig, ) -> Box> where + FV: for<'r> FnMut(&'r mut D, Vec3) -> V + 'a, FA: for<'r> FnMut(&'r mut D, Vec3) -> f32 + 'a, FL: for<'r> FnMut(&'r mut D, Vec3) -> f32 + 'a, FG: for<'r> FnMut(&'r mut D, Vec3) -> f32 + 'a, FO: for<'r> FnMut(&'r mut D, Vec3) -> bool + 'a, - FS: for<'r> FnMut(&'r mut D, Vec3, Vec3, Vec2>) -> Option<(bool, M)>, + FS: for<'r> FnMut(&'r mut D, Vec3, V, V, /*Vec3, */Vec2>) -> Option<(bool, M)>, FP: FnMut(Vec2, Vec2>, Vec3, Vec2>, Vec3, &M), FT: for<'r> FnMut(&'r mut D, Vec3, u8, u8, bool) -> [u8; 4] + 'a, { @@ -451,18 +473,25 @@ where // x (u = y, v = z) greedy_mesh_cross_section( - Vec3::new(greedy_size.y, greedy_size.z, greedy_size_cross.x), - |pos| { + Vec3::new(greedy_size.z, greedy_size.y, greedy_size_cross.x), + #[inline(always)] |pos| { + let pos = draw_delta + Vec3::new(pos.z, pos.y, pos.x); + let delta = Vec3::unit_x(); + let from = get_vox(&mut data, pos - delta); + let to = get_vox(&mut data, pos); should_draw( &mut data, - draw_delta + Vec3::new(pos.z, pos.x, pos.y), - Vec3::unit_x(), - Vec2::new(Vec3::unit_y(), Vec3::unit_z()), + /* draw_delta + Vec3::new(pos.z, pos.x, pos.y), + Vec3::unit_x(), */ + pos, + from, + to, + Vec2::new(Vec3::unit_z(), Vec3::unit_y()), ) }, |pos, dim, &(faces_forward, ref meta)| { - let pos = Vec3::new(pos.z, pos.x, pos.y); - let uv = Vec2::new(Vec3::unit_y(), Vec3::unit_z()); + let pos = Vec3::new(pos.z, pos.y, pos.x); + let uv = Vec2::new(Vec3::unit_z(), Vec3::unit_y()); let norm = Vec3::unit_x(); let atlas_pos = add_to_atlas( atlas, @@ -479,8 +508,8 @@ where pos, dim, uv, - norm, - faces_forward, + -norm, + !faces_forward, meta, atlas_pos, |atlas_pos, dim, pos, draw_dim, norm, meta| { @@ -490,20 +519,27 @@ where }, ); - // y (u = z, v = x) + // y (u = x, v = z) greedy_mesh_cross_section( - Vec3::new(greedy_size.z, greedy_size.x, greedy_size_cross.y), - |pos| { + Vec3::new(greedy_size.x, greedy_size.z, greedy_size_cross.y), + #[inline(always)] |pos| { + let pos = draw_delta + Vec3::new(pos.x, pos.z, pos.y); + let delta = Vec3::unit_y(); + let from = get_vox(&mut data, pos - delta); + let to = get_vox(&mut data, pos); should_draw( &mut data, - draw_delta + Vec3::new(pos.y, pos.z, pos.x), - Vec3::unit_y(), - Vec2::new(Vec3::unit_z(), Vec3::unit_x()), + /* draw_delta + Vec3::new(pos.y, pos.z, pos.x), + Vec3::unit_y(), */ + pos, + from, + to, + Vec2::new(Vec3::unit_x(), Vec3::unit_z()), ) }, |pos, dim, &(faces_forward, ref meta)| { - let pos = Vec3::new(pos.y, pos.z, pos.x); - let uv = Vec2::new(Vec3::unit_z(), Vec3::unit_x()); + let pos = Vec3::new(pos.x, pos.z, pos.y); + let uv = Vec2::new(Vec3::unit_x(), Vec3::unit_z()); let norm = Vec3::unit_y(); let atlas_pos = add_to_atlas( atlas, @@ -520,8 +556,8 @@ where pos, dim, uv, - norm, - faces_forward, + -norm, + !faces_forward, meta, atlas_pos, |atlas_pos, dim, pos, draw_dim, norm, meta| { @@ -534,11 +570,18 @@ where // z (u = x, v = y) greedy_mesh_cross_section( Vec3::new(greedy_size.x, greedy_size.y, greedy_size_cross.z), - |pos| { + #[inline(always)] |pos| { + let pos = draw_delta + Vec3::new(pos.x, pos.y, pos.z); + let delta = Vec3::unit_z(); + let from = get_vox(&mut data, pos - delta); + let to = get_vox(&mut data, pos); should_draw( &mut data, - draw_delta + Vec3::new(pos.x, pos.y, pos.z), - Vec3::unit_z(), + /* draw_delta + Vec3::new(pos.x, pos.y, pos.z), + Vec3::unit_z(), */ + pos, + from, + to, Vec2::new(Vec3::unit_x(), Vec3::unit_y()), ) }, @@ -604,12 +647,10 @@ fn greedy_mesh_cross_section( let mut mask = &mut mask[0..dims.y * dims.x]; (0..dims.z + 1).for_each(|d| { // Compute mask - let mut posi = 0; - (0..dims.y).for_each(|j| { - (0..dims.x).for_each(|i| { + mask.chunks_exact_mut(dims.x).enumerate().for_each(|(j, mask)| { + mask.iter_mut().enumerate().for_each(|(i, mask)| { // NOTE: Safe because dims.z actually fits in a u16. - mask[posi] = draw_face(Vec3::new(i as i32, j as i32, d as i32)); - posi += 1; + *mask = draw_face(Vec3::new(i as i32, j as i32, d as i32)); }); }); /* mask.iter_mut().enumerate().for_each(|(posi, mask)| { @@ -620,12 +661,13 @@ fn greedy_mesh_cross_section( }); */ (0..dims.y).for_each(|j| { + let mask = &mut mask[j * dims.x..]; let mut i = 0; while i < dims.x { // Compute width (number of set x bits for this row and layer, starting at the // current minimum column). - if let Some(ori) = &mask[j * dims.x + i] { - let width = 1 + mask[j * dims.x + i + 1..j * dims.x + dims.x] + if let Some(ori) = &mask[i] { + let width = 1 + mask[i + 1..dims.x] .iter() .take_while(move |&mask| mask.as_ref() == Some(ori)) .count(); @@ -633,19 +675,20 @@ fn greedy_mesh_cross_section( // Compute height (number of rows having w set x bits for this layer, starting // at the current minimum column and row). let height = 1 - + (j + 1..dims.y) - .take_while(|h| { - mask[h * dims.x + i..h * dims.x + max_x] + + mask[dims.x..(dims.y - j) * dims.x/* + max_x + i*/] + .chunks_exact(dims.x) + .take_while(|mask| { + mask[i..max_x] .iter() .all(|mask| mask.as_ref() == Some(ori)) }) .count(); - let max_y = j + height; + let max_y = height; // Add quad. push_quads(Vec3::new(i, j, d), Vec2::new(width, height), ori); // Unset mask bits in drawn region, so we don't try to re-draw them. - (j..max_y).for_each(|l| { - mask[l * dims.x + i..l * dims.x + max_x] + mask[..max_y * dims.x].chunks_exact_mut(dims.x).for_each(|mask| { + mask[i..max_x] .iter_mut() .for_each(|mask| { *mask = None; diff --git a/voxygen/src/mesh/segment.rs b/voxygen/src/mesh/segment.rs index 416c12fed7..2219f153e8 100644 --- a/voxygen/src/mesh/segment.rs +++ b/voxygen/src/mesh/segment.rs @@ -62,12 +62,12 @@ where }; let get_glow = |_vol: &mut V, _pos: Vec3| 0.0; let get_opacity = |vol: &mut V, pos: Vec3| vol.get(pos).map_or(true, |vox| vox.is_empty()); - let should_draw = |vol: &mut V, pos: Vec3, delta: Vec3, uv| { - should_draw_greedy(pos, delta, uv, |vox| { + let should_draw = |vol: &mut V, pos: Vec3, from: Cell, to: Cell, /*delta: Vec3, */uv| { + should_draw_greedy(pos, from, to,/* delta, */uv/*, |vox| { vol.get(vox) .map(|vox| *vox) .unwrap_or_else(|_| Cell::empty()) - }) + } */) }; let create_opaque = |atlas_pos, pos, norm| { TerrainVertex::new_figure(atlas_pos, (pos + offs) * scale, norm, bone_idx) @@ -78,6 +78,11 @@ where draw_delta, greedy_size, greedy_size_cross, + get_vox: |vol: &mut V, vox| { + vol.get(vox) + .map(|vox| *vox) + .unwrap_or_else(|_| Cell::empty()) + }, get_ao: |_: &mut V, _: Vec3| 1.0, get_light, get_glow, @@ -204,8 +209,8 @@ where flat_get(flat, pos).get_color().unwrap_or_else(Rgb::zero) }; let get_opacity = move |flat: &mut _, pos: Vec3| flat_get(flat, pos).is_empty(); - let should_draw = move |flat: &mut _, pos: Vec3, delta: Vec3, uv| { - should_draw_greedy_ao(vertical_stripes, pos, delta, uv, |vox| flat_get(flat, vox)) + let should_draw = move |flat: &mut _, pos: Vec3, from: Cell, to: Cell, /*delta: Vec3, */uv| { + should_draw_greedy_ao(vertical_stripes, pos, from, to,/* delta, */uv/* , |vox| flat_get(flat, vox) */) }; // NOTE: Fits in i16 (much lower actually) so f32 is no problem (and the final // position, pos + mesh_delta, is guaranteed to fit in an f32). @@ -219,6 +224,7 @@ where draw_delta, greedy_size, greedy_size_cross, + get_vox: move |flat: &mut _, vox| flat_get(flat, vox), get_ao: |_: &mut _, _: Vec3| 1.0, get_light, get_glow, @@ -292,12 +298,12 @@ where .unwrap_or_else(Rgb::zero) }; let get_opacity = |vol: &mut V, pos: Vec3| vol.get(pos).map_or(true, |vox| vox.is_empty()); - let should_draw = |vol: &mut V, pos: Vec3, delta: Vec3, uv| { - should_draw_greedy(pos, delta, uv, |vox| { + let should_draw = |vol: &mut V, pos: Vec3, from: Cell, to: Cell, /*delta: Vec3, */uv| { + should_draw_greedy(pos, from, to,/* delta, */uv/*, |vox| { vol.get(vox) .map(|vox| *vox) .unwrap_or_else(|_| Cell::empty()) - }) + }*/) }; let create_opaque = |_atlas_pos, pos: Vec3, norm| ParticleVertex::new(pos, norm); @@ -307,6 +313,11 @@ where draw_delta, greedy_size, greedy_size_cross, + get_vox: |vol: &mut V, vox| { + vol.get(vox) + .map(|vox| *vox) + .unwrap_or_else(|_| Cell::empty()) + }, get_ao: |_: &mut V, _: Vec3| 1.0, get_light, get_glow, @@ -333,12 +344,14 @@ where fn should_draw_greedy( pos: Vec3, - delta: Vec3, + from: Cell, + to: Cell, + /* delta: Vec3, */ _uv: Vec2>, - flat_get: impl Fn(Vec3) -> Cell, + /* flat_get: impl Fn(Vec3) -> Cell, */ ) -> Option<(bool, /* u8 */ ())> { - let from = flat_get(pos - delta); - let to = flat_get(pos); + /* let from = flat_get(pos - delta); + let to = flat_get(pos); */ let from_opaque = !from.is_empty(); if from_opaque != to.is_empty() { None @@ -352,12 +365,14 @@ fn should_draw_greedy( fn should_draw_greedy_ao( vertical_stripes: bool, pos: Vec3, - delta: Vec3, + from: Cell, + to: Cell, + /* delta: Vec3, */ _uv: Vec2>, - flat_get: impl Fn(Vec3) -> Cell, + /* flat_get: impl Fn(Vec3) -> Cell, */ ) -> Option<(bool, bool)> { - let from = flat_get(pos - delta); - let to = flat_get(pos); + /* let from = flat_get(pos - delta); + let to = flat_get(pos); */ let from_opaque = !from.is_empty(); if from_opaque != to.is_empty() { None diff --git a/voxygen/src/mesh/terrain.rs b/voxygen/src/mesh/terrain.rs index f1ad3e09ec..712b96e1bf 100644 --- a/voxygen/src/mesh/terrain.rs +++ b/voxygen/src/mesh/terrain.rs @@ -9,7 +9,7 @@ use crate::{ scene::terrain::BlocksOfInterest, }; use common::{ - terrain::Block, + terrain::{Block, TerrainChunk}, util::either_with, vol::{ReadVol, RectRasterableVol}, volumes::vol_grid_2d::{CachedVolGrid2d, VolGrid2d}, @@ -39,18 +39,33 @@ pub const MAX_LIGHT_DIST: i32 = SUNLIGHT as i32; type CalcLightFn = impl Fn(Vec3) -> f32 + 'static + Send + Sync; #[inline(always)] +/* #[allow(unsafe_code)] */ fn flat_get<'a>(flat: &'a Vec, w: i32, h: i32, d: i32) -> impl Fn(Vec3) -> Block + 'a { - let hd = h * d; - let flat = &flat[0..(w * hd) as usize]; + let wh = w * h; + let flat = &flat[0..(d * wh) as usize]; #[inline(always)] move |Vec3 { x, y, z }| { // z can range from -1..range.size().d + 1 let z = z + 1; - flat[(x * hd + y * d + z) as usize] + flat[((z * wh + y * w + x) as usize)] + /* unsafe { *flat.get_unchecked((z * wh + y * w + x) as usize) } */ /* match flat.get((x * hd + y * d + z) as usize).copied() { Some(b) => b, None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h), } */ } + + /* let hd = h * d; + let flat = &flat[0..(w * hd) as usize]; + #[inline(always)] move |Vec3 { x, y, z }| { + // z can range from -1..range.size().d + 1 + let z = z + 1; + /* flat[((x * hd + y * d + z) as usize)] */ + unsafe { *flat.get_unchecked((x * hd + y * d + z) as usize) } + /* match flat.get((x * hd + y * d + z) as usize).copied() { + Some(b) => b, + None => panic!("x {} y {} z {} d {} h {}", x, y, z, d, h), + } */ + } */ } fn calc_light<'a, @@ -303,7 +318,7 @@ fn calc_light<'a, .copied() .unwrap_or(default_light); - if l != OPAQUE && l != UNKNOWN { + if /* l != OPAQUE && */l != UNKNOWN { l as f32 * SUNLIGHT_INV } else { 0.0 @@ -311,9 +326,11 @@ fn calc_light<'a, } } +type V = TerrainChunk; + #[allow(clippy::type_complexity)] #[inline(always)] -pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + 'static>( +pub fn generate_mesh<'a/*, V: RectRasterableVol + ReadVol + Debug + 'static*/>( vol: &'a VolGrid2d, (range, max_texture_size, boi): (Aabb, Vec2, &'a BlocksOfInterest), ) -> MeshGen< @@ -340,17 +357,38 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' let (w, h, d) = range.size().into_tuple(); // z can range from -1..range.size().d + 1 let d = d + 2; - { + + /// Representative block for air. + const AIR: Block = Block::air(common::terrain::sprite::SpriteKind::Empty); + /// Representative block for liquid. + /// + /// FIXME: Can you really skip meshing for general liquids? Probably not... + const LIQUID: Block = Block::water(common::terrain::sprite::SpriteKind::Empty); + /// Representtive block for solids. + /// + /// FIXME: Really hacky! + const OPAQUE: Block = Block::lava(common::terrain::sprite::SpriteKind::Empty); + + const ALL_OPAQUE: u8 = 0b1; + const ALL_LIQUID: u8 = 0b10; + const ALL_AIR: u8 = 0b100; + // For each horizontal slice of the chunk, we keep track of what kinds of blocks are in it. + // This allows us to compute limits after the fact, much more precisely than keeping track of a + // single intersection would; it also lets us skip homogeoneous slices entirely. + let mut row_kinds = vec![0; d as usize]; + /* { span!(_guard, "copy to flat array"); let hd = h * d; /*let flat = */{ + let mut arena = bumpalo::Bump::new(); + /* let mut volume = vol.cached(); */ - const AIR: Block = Block::air(common::terrain::sprite::SpriteKind::Empty); // TODO: Once we can manage it sensibly, consider using something like // Option instead of just assuming air. /*let mut */flat = vec![AIR; (w * /*h * d*/hd) as usize] /* Vec::with_capacity((w * /*h * d*/hd) as usize) */ ; + let row_kinds = &mut row_kinds[0..d as usize]; let flat = &mut flat/*.spare_capacity_mut()*/[0..(w * hd) as usize]; /* /*volume*/vol.iter().for_each(|(chunk_key, chunk)| { let corner = chunk.key_pos(chunk_key); @@ -368,6 +406,8 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' /* vol.iter().for_each(|(key, chonk)| { */ let chonk = &*chonk; let pos = vol.key_pos(key); + /* // Avoid diagonals. + if pos.x != range.min.x + 1 && pos.y != range.min.y + 1 { return; } */ // Calculate intersection of Aabb and this chunk // TODO: should we do this more implicitly as part of the loop // TODO: this probably has to be computed in the chunk.for_each_in() as well @@ -390,6 +430,9 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' min: VolGrid2d::::chunk_offs(intersection_.min) + Vec3::new(0, 0, z_diff), max: VolGrid2d::::chunk_offs(intersection_.max) + Vec3::new(1, 1, z_diff + 1), }; + let z_diff = z_diff + chonk.get_min_z(); + let z_max = chonk.get_max_z() - chonk.get_min_z(); + let below = *chonk.below(); /* [[0 ..1]; [0 ..1]; [0..d]] [[0 ..1]; [1 ..h-1]; [0..d]] @@ -402,8 +445,46 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' [1,1; d] */ + let flat_chunk = chonk.make_flat(/*&stone_slice, &air_slice, */&arena); + let mut i = (x_diff * hd + y_diff * d) as usize; let hd_ = (intersection.size().h * d) as usize; + + let min_z_ = z_diff - intersection.min.z; + let max_z_ = z_max + z_diff - intersection.min.z; + let row_fill = if below.is_opaque() { + /* opaque_limits = opaque_limits + .map(|l| l.including(z_diff)) + .or_else(|| Some(Limits::from_value(z_diff))); */ + ALL_OPAQUE + } else if below.is_liquid() { + /* fluid_limits = fluid_limits + .map(|l| l.including(z_diff)) + .or_else(|| Some(Limits::from_value(z_diff))); */ + ALL_LIQUID + } else { + /* // Assume air + air_limits = air_limits + .map(|l| l.including(z_diff)) + .or_else(|| Some(Limits::from_value(z_diff))); */ + ALL_AIR + }; + + let skip_count = min_z_.max(0); + let take_count = (max_z_.min(d) - skip_count).max(0); + let skip_count = skip_count as usize; + let take_count = take_count as usize; + + // Fill the bottom rows with their below type. + row_kinds.iter_mut().take(skip_count).for_each(|row| { + *row |= row_fill; + }); + // Fill the top rows with air (we just assume that's the above type, since it + // always is in practice). + row_kinds.iter_mut().skip(skip_count + take_count).for_each(|row| { + *row |= ALL_AIR; + }); + // dbg!(pos, intersection_, intersection, range, flat_range, x_diff, y_diff, z_diff, y_rem, x_off, i); (intersection.min.x..intersection.max.x).for_each(|x| { let flat = &mut flat[i..i + /*intersection.size().y * intersection.size().z*/hd_]; @@ -414,7 +495,14 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' /* chonk.for_each_in(intersection, |pos_offset, block| { pos_offset.z += z_diff; }); */ - flat.into_iter().enumerate().for_each(|(z, flat)| { + + // intersection.min.z = range.min.z - 1 - range.min.z = -1 + // z_diff = chonk.get_min_z() - range.min.z + // min_z_ = chonk.get_min_z() - (range.min.z - 1) + // + // max_z_ = (chonk.get_max_z() - (range.min.z - 1)).min(d - skip_count) + flat[0..skip_count].fill(below); + flat.into_iter().zip(row_kinds.into_iter()).enumerate().skip(skip_count).take(take_count).for_each(|(z, (flat, row))| { let z = z as i32 + intersection.min.z; /* (intersection.min.z..intersection.max.z).for_each(|z| { */ /* let mut i = ((x_diff + (x - intersection.min.x)) * hd + (y_diff + (y - intersection.min.y)) * d + (z - intersection.min.z)) as usize; */ @@ -432,21 +520,34 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' // since it's not clear this will work properly with liquid. .unwrap_or(AIR); */ /* if let Ok(&block) = chonk.get(Vec3::new(x, y, z - z_diff)) */ - let block = chonk.get(Vec3::new(x, y, z - z_diff)).copied().unwrap_or(AIR); + let block_pos = Vec3::new(x, y, z - z_diff); + let block = /* if block_pos.z < 0 { + *chonk.below() + } else if block_pos.z >= z_max { + *chonk.above() + } else */{ + let grp_id = common::terrain::TerrainSubChunk::grp_idx(block_pos) as usize; + let rel_id = common::terrain::TerrainSubChunk::rel_idx(block_pos) as usize; + flat_chunk[grp_id][rel_id] + }; + /* let block = chonk.get(block_pos).copied().unwrap_or(AIR); */ { - if block.is_opaque() { - opaque_limits = opaque_limits + *row |= if block.is_opaque() { + /* opaque_limits = opaque_limits .map(|l| l.including(z)) - .or_else(|| Some(Limits::from_value(z))); + .or_else(|| Some(Limits::from_value(z))); */ + ALL_OPAQUE } else if block.is_liquid() { - fluid_limits = fluid_limits + /* fluid_limits = fluid_limits .map(|l| l.including(z)) - .or_else(|| Some(Limits::from_value(z))); + .or_else(|| Some(Limits::from_value(z))); */ + ALL_LIQUID } else { // Assume air - air_limits = air_limits + /* air_limits = air_limits .map(|l| l.including(z)) - .or_else(|| Some(Limits::from_value(z))); + .or_else(|| Some(Limits::from_value(z))); */ + ALL_AIR }; /*flat[i] = block*//*unsafe { flat.get_unchecked_mut(i) }*//*flat[i].write(block);*/ /* flat.write(block); */ @@ -466,9 +567,188 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' // i += x_off; i += hd as usize; }); + + arena.reset(); /* }); */ }); }); + + // Compute limits (TODO: see if we can skip this, or make it more precise?). + row_kinds.iter().enumerate().for_each(|(z, row)| { + let z = z as i32 /* + intersection.min.z */- 1; + if row & ALL_OPAQUE != 0 { + opaque_limits = opaque_limits + .map(|l| l.including(z)) + .or_else(|| Some(Limits::from_value(z))); + } + if row & ALL_LIQUID != 0 { + fluid_limits = fluid_limits + .map(|l| l.including(z)) + .or_else(|| Some(Limits::from_value(z))); + } + if row & ALL_AIR != 0 { + air_limits = air_limits + .map(|l| l.including(z)) + .or_else(|| Some(Limits::from_value(z))); + } + }); + } + /* unsafe { flat.set_len((w * hd) as usize); } */ + } */ + { + span!(_guard, "copy to flat array"); + let wh = w * h; + { + let mut arena = bumpalo::Bump::new(); + + flat = vec![AIR; (d * wh) as usize]; + let row_kinds = &mut row_kinds[0..d as usize]; + let flat = &mut flat[0..(d * wh) as usize]; + let flat_range = Aabb { + min: range.min - Vec3::new(0, 0, 1), + max: range.max - Vec3::new(1, 1, 0), + }; + let min_chunk_key = vol.pos_key(flat_range.min); + let max_chunk_key = vol.pos_key(flat_range.max); + (min_chunk_key.x..max_chunk_key.x + 1).for_each(|key_x| { + (min_chunk_key.y..max_chunk_key.y + 1).for_each(|key_y| { + let key = Vec2::new(key_x, key_y); + let chonk = vol.get_key(key).expect("All keys in range must have chonks."); + let chonk = &*chonk; + let pos = vol.key_pos(key); + let intersection_ = flat_range.intersection(Aabb { + min: pos.with_z(i32::MIN), + max: (pos + VolGrid2d::::chunk_size().map(|e| e as i32) - 1).with_z(i32::MAX), + }); + + // Map intersection into chunk coordinates + let x_diff = intersection_.min.x - flat_range.min.x; + let y_diff = intersection_.min.y - flat_range.min.y; + let z_diff = -range.min.z; + /* let y_rem = flat_range.max.y - intersection_.max.y; + let x_off = ((y_diff + y_rem) * d) as usize; */ + + let intersection = Aabb { + min: VolGrid2d::::chunk_offs(intersection_.min) + Vec3::new(0, 0, z_diff), + max: VolGrid2d::::chunk_offs(intersection_.max) + Vec3::new(1, 1, z_diff + 1), + }; + let z_diff = z_diff + chonk.get_min_z(); + let z_max = chonk.get_max_z() - chonk.get_min_z(); + let below = *chonk.below(); + + let flat_chunk = chonk.make_flat(&arena); + + let min_z_ = z_diff - intersection.min.z; + let max_z_ = z_max + z_diff - intersection.min.z; + + let row_fill = if below.is_opaque() { + ALL_OPAQUE + } else if below.is_liquid() { + ALL_LIQUID + } else { + ALL_AIR + }; + + let skip_count = min_z_.max(0); + let take_count = (max_z_.min(d) - skip_count).max(0); + let skip_count = skip_count as usize; + let take_count = take_count as usize; + + row_kinds.iter_mut().take(skip_count).for_each(|row| { + *row |= row_fill; + }); + row_kinds.iter_mut().skip(skip_count + take_count).for_each(|row| { + *row |= ALL_AIR; + }); + + // dbg!(pos, intersection_, intersection, range, flat_range, x_diff, y_diff, z_diff, y_rem, x_off, i); + flat.chunks_exact_mut(wh as usize).take(skip_count).for_each(|flat| { + flat.chunks_exact_mut(w as usize).skip(y_diff as usize).take((intersection.max.y - intersection.min.y) as usize).for_each(|flat| { + flat.into_iter().skip(x_diff as usize).take((intersection.max.x - intersection.min.x) as usize).for_each(|flat| { + *flat = below; + }); + }); + }); + + flat.chunks_exact_mut(wh as usize).zip(row_kinds.into_iter()).enumerate().skip(skip_count).take(take_count).for_each(|(z, (flat, row_))| { + let mut row = *row_; + let z = z as i32 + intersection.min.z - z_diff; + flat.chunks_exact_mut(w as usize).skip(y_diff as usize).enumerate().take((intersection.max.y - intersection.min.y) as usize).for_each(|(y, flat)| { + let y = y as i32 + intersection.min.y; + flat.into_iter().skip(x_diff as usize).enumerate().take((intersection.max.x - intersection.min.x) as usize).for_each(|(x, flat)| { + let x = x as i32 + intersection.min.x; + let block_pos = Vec3::new(x, y, z); + let block = { + let grp_id = common::terrain::TerrainSubChunk::grp_idx(block_pos) as usize; + let rel_id = common::terrain::TerrainSubChunk::rel_idx(block_pos) as usize; + flat_chunk[grp_id][rel_id] + }; + { + row |= if block.is_opaque() { + ALL_OPAQUE + } else if block.is_liquid() { + ALL_LIQUID + } else { + ALL_AIR + }; + *flat = block; + } + }); + }); + *row_ = row; + }); + /* (intersection.min.z..intersection.max.z).for_each(|z| { + let flat = &mut flat[i..i + /*intersection.size().y * intersection.size().z*/hd_]; + flat.chunks_exact_mut(d as usize).enumerate().for_each(|(y, flat)| { + let y = y as i32 + intersection.min.y; + flat[0..skip_count].fill(below); + flat.into_iter().zip(row_kinds.into_iter()).enumerate().skip(skip_count).take(take_count).for_each(|(z, (flat, row))| { + let z = z as i32 + intersection.min.z; + /* let mut i = ((x_diff + (x - intersection.min.x)) * hd + (y_diff + (y - intersection.min.y)) * d + (z - intersection.min.z)) as usize; */ + let block_pos = Vec3::new(x, y, z - z_diff); + let block = { + let grp_id = common::terrain::TerrainSubChunk::grp_idx(block_pos) as usize; + let rel_id = common::terrain::TerrainSubChunk::rel_idx(block_pos) as usize; + flat_chunk[grp_id][rel_id] + }; + { + *row |= if block.is_opaque() { + ALL_OPAQUE + } else if block.is_liquid() { + ALL_LIQUID + } else { + ALL_AIR + }; + *flat = block; + } + }); + }); + i += hd as usize; + }); */ + + arena.reset(); + }); + }); + + // Compute limits (TODO: see if we can skip this, or make it more precise?). + row_kinds.iter().enumerate().for_each(|(z, row)| { + let z = z as i32 /* + intersection.min.z */- 1; + if row & ALL_OPAQUE != 0 { + opaque_limits = opaque_limits + .map(|l| l.including(z)) + .or_else(|| Some(Limits::from_value(z))); + } + if row & ALL_LIQUID != 0 { + fluid_limits = fluid_limits + .map(|l| l.including(z)) + .or_else(|| Some(Limits::from_value(z))); + } + if row & ALL_AIR != 0 { + air_limits = air_limits + .map(|l| l.including(z)) + .or_else(|| Some(Limits::from_value(z))); + } + }); } /* unsafe { flat.set_len((w * hd) as usize); } */ } @@ -479,12 +759,12 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' let (z_start, z_end) = match (air_limits, fluid_limits, opaque_limits) { (Some(air), Some(fluid), Some(opaque)) => { let air_fluid = air.intersection(fluid); - if let Some(intersection) = air_fluid.filter(|limits| limits.min + 1 == limits.max) { + /* if let Some(intersection) = air_fluid.filter(|limits| limits.min + 1 == limits.max) { // If there is a planar air-fluid boundary, just draw it directly and avoid // redundantly meshing the whole fluid volume, then interect the ground-fluid // and ground-air meshes to make sure we don't miss anything. either_with(air.intersection(opaque), fluid.intersection(opaque), Limits::union) - } else { + } else */{ // Otherwise, do a normal three-way intersection. air.three_way_intersection(fluid, opaque) } @@ -564,22 +844,10 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' let max_size = max_texture_size; assert!(z_end >= z_start); - let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1); - // NOTE: Terrain sizes are limited to 32 x 32 x 16384 (to fit in 24 bits: 5 + 5 - // + 14). FIXME: Make this function fallible, since the terrain - // information might be dynamically generated which would make this hard - // to enforce. - assert!(greedy_size.x <= 32 && greedy_size.y <= 32 && greedy_size.z <= 16384); - // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16, - // which always fits into a f32. - let max_bounds: Vec3 = greedy_size.as_::(); - // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16, - // which always fits into a usize. - let greedy_size = greedy_size.as_::(); - let greedy_size_cross = Vec3::new(greedy_size.x - 1, greedy_size.y - 1, greedy_size.z); - let draw_delta = Vec3::new(1, 1, z_start); let flat_get = flat_get(&flat, w, h, d); + let get_color = + #[inline(always)] |_: &mut (), pos: Vec3| flat_get(pos).get_color().unwrap_or_else(Rgb::zero); let get_light = #[inline(always)] |_: &mut (), pos: Vec3| { if flat_get(pos).is_opaque() { 0.0 @@ -591,25 +859,44 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' if flat_get(pos).is_opaque() { 0.0 } else { 1.0 } }; let get_glow = #[inline(always)] |_: &mut (), pos: Vec3| glow(pos + range.min); - let get_color = - #[inline(always)] |_: &mut (), pos: Vec3| flat_get(pos).get_color().unwrap_or_else(Rgb::zero); let get_opacity = #[inline(always)] |_: &mut (), pos: Vec3| !flat_get(pos).is_opaque(); - let should_draw = #[inline(always)] |_: &mut (), pos: Vec3, delta: Vec3, _uv| { - should_draw_greedy(pos, delta, #[inline(always)] |pos| flat_get(pos)) + let should_draw = #[inline(always)] |_: &mut (), /*pos*/_: Vec3, from: Block, to: Block,/* delta: Vec3,*/ _uv: Vec2>| { + should_draw_greedy(/*pos, */from, to/*, delta, #[inline(always)] |pos| flat_get(pos) */) }; + + let mut greedy = + GreedyMesh::::new(max_size, greedy::terrain_config()); + let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1); + let mesh_delta = Vec3::new(0.0, 0.0, (z_start + range.min.z) as f32); + let max_bounds: Vec3 = greedy_size.as_::(); + let mut do_draw_greedy = #[inline(always)] |z_start: i32, z_end: i32| { + // dbg!(range.min, z_start, z_end); + let greedy_size = Vec3::new(range.size().w - 2, range.size().h - 2, z_end - z_start + 1); + // NOTE: Terrain sizes are limited to 32 x 32 x 16384 (to fit in 24 bits: 5 + 5 + // + 14). FIXME: Make this function fallible, since the terrain + // information might be dynamically generated which would make this hard + // to enforce. + assert!(greedy_size.x <= 32 && greedy_size.y <= 32 && greedy_size.z <= 16384); + // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16, + // which always fits into a f32. + // NOTE: Cast is safe by prior assertion on greedy_size; it fits into a u16, + // which always fits into a usize. + let greedy_size = greedy_size.as_::(); + let greedy_size_cross = Vec3::new(greedy_size.x - 1, greedy_size.y - 1, greedy_size.z); + let draw_delta = Vec3::new(1, 1, z_start); + // NOTE: Conversion to f32 is fine since this i32 is actually in bounds for u16. let mesh_delta = Vec3::new(0.0, 0.0, (z_start + range.min.z) as f32); let create_opaque = - #[inline(always)] |atlas_pos, pos, norm, meta| TerrainVertex::new(atlas_pos, pos + mesh_delta, norm, meta); - let create_transparent = #[inline(always)] |_atlas_pos, pos, norm| FluidVertex::new(pos + mesh_delta, norm); + #[inline(always)] |atlas_pos, pos: Vec3, norm, meta| TerrainVertex::new(atlas_pos, pos + mesh_delta, norm, meta); + let create_transparent = #[inline(always)] |_atlas_pos: Vec2, pos: Vec3, norm: Vec3| FluidVertex::new(pos + mesh_delta, norm); - let mut greedy = - GreedyMesh::::new(max_size, greedy::general_config()); greedy.push(GreedyConfig { data: (), draw_delta, greedy_size, greedy_size_cross, + get_vox: #[inline(always)] |_: &mut (), pos| flat_get(pos), get_ao, get_light, get_glow, @@ -643,6 +930,66 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' TerrainVertex::make_col_light(light, glow, get_color(data, pos), ao) }, }); + }; + + let mut z_start = z_start; + let mut row_iter = row_kinds.iter().enumerate(); + let mut needs_draw = false; + row_kinds.array_windows().enumerate().skip(z_start as usize).take((z_end - z_start + 1) as usize).for_each(|(z, &[from_row, to_row])| { + let z = z as i32; + // Evaluate a "canonicalized" greedy mesh algorithm on this pair of row kinds, to see if we're + // about to switch (requiring us to draw a surface). + let from = match from_row { + ALL_AIR => Some(AIR), + ALL_LIQUID => Some(LIQUID), + ALL_OPAQUE => Some(OPAQUE), + _ => None, + }; + let to = match to_row { + ALL_AIR => Some(AIR), + ALL_LIQUID => Some(LIQUID), + ALL_OPAQUE => Some(OPAQUE), + _ => None, + }; + // There are two distinct cases: + let (from, to) = match from.zip(to) { + None => { + // At least one of the two rows is not homogeneous. + if !needs_draw { + // The from is homogeneous (since !needs_draw), but the to is not. We should + // start a new draw without drawing the old volume. + z_start = z; + needs_draw = true; + } + // Otherwise, we were in the middle of drawing the previous row, so we just extend + // the current draw. + return; + }, + Some(pair) => pair, + }; + let old_needs_draw = needs_draw; + // The from *and* to are both homogeneous, so we can compute whether we should draw + // a surface between them. + needs_draw = should_draw_greedy(from, to).is_some(); + if needs_draw == old_needs_draw { + // We continue the current draw (or nondraw). + return; + } + if old_needs_draw { + // old_needs_draw is true, so we need to start a fresh draw and end an earlier draw, + // drawing the existing volume (needs_draw is false). + do_draw_greedy(z_start, z - 1); + } + // We always must start a fresh draw. + z_start = z; + }); + // Finally, draw any remaining terrain, if necessary. + if needs_draw { + /* if z_start != z_end { + dbg!(range.min, z_start, z_end); + } */ + do_draw_greedy(z_start, z_end); + } let min_bounds = mesh_delta; let bounds = Aabb { @@ -668,24 +1015,44 @@ pub fn generate_mesh<'a, V: RectRasterableVol + ReadVol + Debug + ' /// [scene::terrain::Terrain::skip_remesh]. #[inline(always)] fn should_draw_greedy( - pos: Vec3, - delta: Vec3, - flat_get: impl Fn(Vec3) -> Block, + /* pos: Vec3, */ + from: Block, + to: Block, + /* delta: Vec3, + flat_get: impl Fn(Vec3) -> Block, */ ) -> Option<(bool, FaceKind)> { - let from = flat_get(pos - delta); - let to = flat_get(pos); + /* let from = flat_get(pos - delta); + let to = flat_get(pos); */ // Don't use `is_opaque`, because it actually refers to light transmission - let from_filled = from.is_filled(); + /* let from = from.kind() as u8 & 0xF; + let to = to.kind() as u8 & 0xF; + (from ^ to) | ((from.overflowing_sub(1) > to.overflowing_sub(1)) as u8 << 2) */ + use common::terrain::BlockKind; + match (from.kind(), to.kind()) { + (BlockKind::Air, BlockKind::Water) => Some((false, FaceKind::Fluid)), + (BlockKind::Water, BlockKind::Air) => Some((true, FaceKind::Fluid)), + (BlockKind::Air, BlockKind::Air) | (BlockKind::Water, BlockKind::Water) => None, + (BlockKind::Air, _) => Some((false, FaceKind::Opaque(false))), + (_, BlockKind::Air) => Some((true, FaceKind::Opaque(false))), + (BlockKind::Water, _) => Some((false, FaceKind::Opaque(true))), + (_, BlockKind::Water) => Some((true, FaceKind::Opaque(true))), + _ => None, + } + /* let from_filled = from.is_filled(); if from_filled == to.is_filled() { // Check the interface of liquid and non-tangible non-liquid (e.g. air). - let from_liquid = from.is_liquid(); - if from_liquid == to.is_liquid() || /*from.is_filled() || to.is_filled()*/from_filled { + if from_filled { None } else { - // While liquid is not culled, we still try to keep a consistent orientation as - // we do for land; if going from liquid to non-liquid, - // forwards-facing; otherwise, backwards-facing. - Some((from_liquid, FaceKind::Fluid)) + let from_liquid = /*from.is_liquid()*/!from.is_air(); + if from_liquid == /*to.is_liquid()*/!to.is_air()/*from.is_filled() || to.is_filled()*//* from_filled */ { + None + } else { + // While liquid is not culled, we still try to keep a consistent orientation as + // we do for land; if going from liquid to non-liquid, + // forwards-facing; otherwise, backwards-facing. + Some((from_liquid, FaceKind::Fluid)) + } } } else { // If going from unfilled to filled, backward facing; otherwise, forward @@ -693,12 +1060,12 @@ fn should_draw_greedy( Some(( from_filled, FaceKind::Opaque(if from_filled { - to.is_liquid() + /* to.is_liquid() */!to.is_air() } else { - from.is_liquid() + /* from.is_liquid() */!from.is_air() }), )) - } + } */ } /// 1D Aabr diff --git a/voxygen/src/scene/figure/cache.rs b/voxygen/src/scene/figure/cache.rs index ed5153528d..49709610d1 100644 --- a/voxygen/src/scene/figure/cache.rs +++ b/voxygen/src/scene/figure/cache.rs @@ -336,7 +336,7 @@ where let manifests = self.manifests.clone(); let slot_ = Arc::clone(&slot); - slow_jobs.spawn("FIGURE_MESHING", move || { + slow_jobs.spawn(&"FIGURE_MESHING", move || { // First, load all the base vertex data. let meshes = ::bone_meshes(&key, &manifests, extra); diff --git a/voxygen/src/scene/terrain.rs b/voxygen/src/scene/terrain.rs index 33ef6cd8a1..36e1cd0bb1 100644 --- a/voxygen/src/scene/terrain.rs +++ b/voxygen/src/scene/terrain.rs @@ -35,7 +35,7 @@ use core::{f32, fmt::Debug, marker::PhantomData, time::Duration}; use crossbeam_channel as channel; use enum_iterator::IntoEnumIterator; use guillotiere::AtlasAllocator; -use hashbrown::HashMap; +use hashbrown::{hash_map, HashMap}; use serde::Deserialize; use std::sync::{ atomic::{AtomicU64, Ordering}, @@ -103,11 +103,25 @@ pub struct TerrainChunkData { frustum_last_plane_index: u8, } -#[derive(Copy, Clone)] +#[derive(Clone, Copy, Eq, PartialEq)] +enum ChunkWorkerStatus { + /// The worker is not currently active. + Invalid, + /// The worker is currently active and the chunk it is working on is up to date. + Active, + /// The worker was once active for this chunk (it may or may not currently be + /// active), but the chunk is stale and needs remeshing, so we want to process + /// it as soon as possible to prioritize the update. + Stale, +} + +#[derive(Clone)] struct ChunkMeshState { pos: Vec2, - started_tick: u64, - is_worker_active: bool, + /// Only ever set from the main thread. Always increasing, and it's okay to read a stale vlue + /// from the worker threads, so we can use Relaxed loads and stores. + started_tick: Arc, + status: ChunkWorkerStatus, // If this is set, we skip the actual meshing part of the update. skip_remesh: bool, } @@ -226,11 +240,13 @@ impl assets::Asset for SpriteSpec { const EXTENSION: &'static str = "ron"; } +type V = TerrainChunk; + /// Function executed by worker threads dedicated to chunk meshing. /// skip_remesh is either None (do the full remesh, including recomputing the /// light map), or Some((light_map, glow_map)). -fn mesh_worker + RectRasterableVol + ReadVol + Debug + 'static>( +fn mesh_worker/* + RectRasterableVol + ReadVol + Debug + 'static>*/( pos: Vec2, z_bounds: (f32, f32), skip_remesh: Option<(LightMapFn, LightMapFn)>, @@ -630,7 +646,7 @@ impl SpriteRenderContext { } } -impl Terrain { +impl/**/ Terrain { pub fn new( client: &Client, renderer: &mut Renderer, @@ -754,7 +770,7 @@ impl Terrain { ..Default::default() }, ); - slowjob.spawn("TERRAIN_MESHING", move || { + slowjob.spawn(&"IMAGE_PROCESSING", move || { // Construct the next atlas on a separate thread. If it doesn't get sent, it means // the original channel was dropped, which implies the terrain scene data no longer // exists, so we can just drop the result in that case. @@ -781,20 +797,20 @@ impl Terrain { Ok(col_light) } - fn remove_chunk_meta(&mut self, _pos: Vec2, chunk: &TerrainChunkData) { + fn remove_chunk_meta(atlas: &mut AtlasAllocator, _pos: Vec2, chunk: &TerrainChunkData) { // No need to free the allocation if the chunk is not allocated in the current // atlas, since we don't bother tracking it at that point. if let Some(col_lights) = chunk.col_lights_alloc { - self.atlas.deallocate(col_lights); + atlas.deallocate(col_lights); } /* let (zmin, zmax) = chunk.z_bounds; self.z_index_up.remove(Vec3::from(zmin, pos.x, pos.y)); self.z_index_down.remove(Vec3::from(zmax, pos.x, pos.y)); */ } - fn insert_chunk(&mut self, pos: Vec2, chunk: TerrainChunkData) { - if let Some(old) = self.chunks.insert(pos, chunk) { - self.remove_chunk_meta(pos, &old); + fn insert_chunk(chunks: &mut HashMap, TerrainChunkData>, atlas: &mut AtlasAllocator, pos: Vec2, chunk: TerrainChunkData) { + if let Some(old) = chunks.insert(pos, chunk) { + Self::remove_chunk_meta(atlas, pos, &old); } /* let (zmin, zmax) = chunk.z_bounds; self.z_index_up.insert(Vec3::from(zmin, pos.x, pos.y)); @@ -803,13 +819,15 @@ impl Terrain { fn remove_chunk(&mut self, pos: Vec2) { if let Some(chunk) = self.chunks.remove(&pos) { - self.remove_chunk_meta(pos, &chunk); + Self::remove_chunk_meta(&mut self.atlas, pos, &chunk); // Temporarily remember dead chunks for shadowing purposes. self.shadow_chunks.push((pos, chunk)); } - if let Some(_todo) = self.mesh_todo.remove(&pos) { - //Do nothing on todo mesh removal. + if let Some(todo) = self.mesh_todo.remove(&pos) { + // Update the old starting tick to u64::MAX so any chunk workers that haven't started + // yet can be canceled. + todo.started_tick.store(u64::MAX, Ordering::Relaxed); } } @@ -945,7 +963,11 @@ impl Terrain { for i in -1..2 { for j in -1..2 { if i != 0 || j != 0 { - self.mesh_todo.remove(&(pos + Vec2::new(i, j))); + if let Some(todo) = self.mesh_todo.remove(&(pos + Vec2::new(i, j))) { + // Update the old starting tick to u64::MAX so any chunk workers that + // haven't started yet can be canceled. + todo.started_tick.store(u64::MAX, Ordering::Relaxed); + } } } } @@ -980,9 +1002,10 @@ impl Terrain { for j in -1..2 { let pos = pos + Vec2::new(i, j); - if !(self.chunks.contains_key(&pos) || self.mesh_todo.contains_key(&pos)) - || modified - { + let entry = self.mesh_todo.entry(pos); + let done_meshing = self.chunks.contains_key(&pos); + let in_progress = done_meshing || matches!(entry, hash_map::Entry::Occupied(_)); + if modified || !in_progress { let mut neighbours = true; for i in -1..2 { for j in -1..2 { @@ -992,12 +1015,22 @@ impl Terrain { } if neighbours { - self.mesh_todo.insert(pos, ChunkMeshState { + let todo = entry.or_insert_with(|| ChunkMeshState { pos, - started_tick: current_tick, - is_worker_active: false, + started_tick: Arc::new(AtomicU64::new(current_tick)), + status: ChunkWorkerStatus::Invalid, skip_remesh: false, }); + + todo.skip_remesh = false; + todo.started_tick.store(current_tick, Ordering::Relaxed); + todo.status = if done_meshing/* || todo.status != ChunkWorkerStatus::Invalid*/ { + // Make the new status stale, to make sure the chunk gets updated + // promptly. + ChunkWorkerStatus::Stale + } else { + ChunkWorkerStatus::Invalid + }; } } } @@ -1077,13 +1110,14 @@ impl Terrain { } } if neighbours { + let done_meshing = self.chunks.contains_key(&neighbour_chunk_pos); let todo = self.mesh_todo .entry(neighbour_chunk_pos) - .or_insert(ChunkMeshState { + .or_insert_with(|| ChunkMeshState { pos: neighbour_chunk_pos, - started_tick: current_tick, - is_worker_active: false, + started_tick: Arc::new(AtomicU64::new(current_tick)), + status: ChunkWorkerStatus::Invalid, skip_remesh, }); @@ -1094,8 +1128,15 @@ impl Terrain { // since otherwise the active remesh is computing new lighting values // that we don't have yet. todo.skip_remesh &= skip_remesh; - todo.is_worker_active = false; - todo.started_tick = current_tick; + todo.started_tick.store(current_tick, Ordering::Relaxed); + todo.status = if done_meshing/* || todo.status != ChunkWorkerStatus::Invalid*/ { + // This chunk is now stale, and was already meshed before, so we + // want to update it as soon as possible to make its update take + // effect. + ChunkWorkerStatus::Stale + } else { + ChunkWorkerStatus::Invalid + }; } } } @@ -1116,20 +1157,20 @@ impl Terrain { let mut todo = self .mesh_todo .values_mut() - .filter(|todo| !todo.is_worker_active) + .filter(|todo| todo.status != ChunkWorkerStatus::Active) // TODO: BinaryHeap .collect::>(); todo.sort_unstable_by_key(|todo| { ( (todo.pos.as_::() * TerrainChunk::RECT_SIZE.as_::()) .distance_squared(mesh_focus_pos), - todo.started_tick, + todo.started_tick.load(Ordering::Relaxed), ) }); let slowjob = scene_data.state.slow_job_pool(); for (todo, chunk) in todo.into_iter() - .filter(|todo| !todo.is_worker_active) + /* .filter(|todo| todo.status != ChunkWorkerStatus::Active) */ /* .min_by_key(|todo| ((todo.pos.as_::() * TerrainChunk::RECT_SIZE.as_::()).distance_squared(mesh_focus_pos), todo.started_tick)) */ // Find a reference to the actual `TerrainChunk` we're meshing ./*and_then*/filter_map(|todo| { @@ -1144,9 +1185,9 @@ impl Terrain { })?)) }) { - if self.mesh_todos_active.load(Ordering::Relaxed) > meshing_cores * 8 { + /* if self.mesh_todos_active.load(Ordering::Relaxed) > /* meshing_cores * 16 */CHUNKS_PER_SECOND as u64 / 60 { break; - } + } */ // like ambient occlusion and edge elision, we also need the borders // of the chunk's neighbours too (hence the `- 1` and `+ 1`). @@ -1174,13 +1215,16 @@ impl Terrain { }; // The region to actually mesh + let mesh_filter = |pos: &Vec2| + pos.x == todo.pos.x && pos.y <= todo.pos.y || + pos.y == todo.pos.y && pos.x <= todo.pos.x; let min_z = volume .iter() - .filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y) + .filter(|(pos, _)| mesh_filter(pos)) .fold(i32::MAX, |min, (_, chunk)| chunk.get_min_z().min(min)); let max_z = volume .iter() - .filter(|(pos, _)| pos.x <= todo.pos.x && pos.y <= todo.pos.y) + .filter(|(pos, _)| mesh_filter(pos)) .fold(i32::MIN, |max, (_, chunk)| chunk.get_max_z().max(max)); let aabb = Aabb { @@ -1200,7 +1244,8 @@ impl Terrain { .map(|chunk| (Arc::clone(&chunk.light_map), Arc::clone(&chunk.glow_map))); // Queue the worker thread. - let started_tick = todo.started_tick; + let started_tick_ = Arc::clone(&todo.started_tick); + let started_tick = started_tick_.load(Ordering::Relaxed); let sprite_data = Arc::clone(&self.sprite_data); let sprite_config = Arc::clone(&self.sprite_config); let cnt = Arc::clone(&self.mesh_todos_active); @@ -1209,8 +1254,14 @@ impl Terrain { let create_instances = renderer.create_instances_lazy(); let create_locals = renderer.create_terrain_bound_locals(); cnt.fetch_add(1, Ordering::Relaxed); - slowjob - .spawn("TERRAIN_MESHING", move || { + let job = move || { + // Since this loads when the task actually *runs*, rather than when it's + // queued, it provides us with a good opportunity to check whether the chunk + // should be canceled. We might miss updates, but that's okay, since canceling + // is just an optimization. + let started_tick_ = started_tick_.load(Ordering::Relaxed); + if started_tick_ <= started_tick { + // This meshing job was not canceled. let sprite_data = sprite_data; let _ = send.send(mesh_worker( pos, @@ -1228,9 +1279,17 @@ impl Terrain { create_instances, create_locals, )); - cnt.fetch_sub(1, Ordering::Relaxed); - }); - todo.is_worker_active = true; + } + cnt.fetch_sub(1, Ordering::Relaxed); + }; + if todo.status == ChunkWorkerStatus::Stale { + // The chunk was updated unexpectedly, so insert at the front, not the back, to see + // the update as soon as possible. + slowjob.spawn_front(&"TERRAIN_MESHING", job); + } else { + slowjob.spawn(&"TERRAIN_MESHING", job); + } + todo.status = ChunkWorkerStatus::Active; } drop(terrain); drop(guard); @@ -1243,16 +1302,21 @@ impl Terrain { let recv_count = scene_data.state.get_delta_time() * CHUNKS_PER_SECOND + self.mesh_recv_overflow; self.mesh_recv_overflow = recv_count.fract(); + let mesh_recv = &self.mesh_recv; let incoming_chunks = - std::iter::from_fn(|| self.mesh_recv.recv_timeout(Duration::new(0, 0)).ok()) - .take(recv_count.floor() as usize) - .collect::>(); // Avoid ownership issue + std::iter::from_fn(|| mesh_recv.try_recv().ok()) + .take(recv_count.floor() as usize); for response in incoming_chunks { match self.mesh_todo.get(&response.pos) { // It's the mesh we want, insert the newly finished model into the terrain model // data structure (convert the mesh to a model first of course). - Some(todo) if response.started_tick <= todo.started_tick => { - let started_tick = todo.started_tick; + Some(todo) => { + let started_tick = todo.started_tick.load(Ordering::Relaxed); + if response.started_tick > started_tick { + // Chunk must have been removed, or it was spawned on an old tick. Drop + // the mesh since it's either out of date or no longer needed. + continue; + } let sprite_instances = response.sprite_instances; @@ -1331,7 +1395,7 @@ impl Terrain { load_time, )]); - self.insert_chunk(response.pos, TerrainChunkData { + Self::insert_chunk(&mut self.chunks, &mut self.atlas, response.pos, TerrainChunkData { load_time, opaque_model: mesh.opaque_model, fluid_model: mesh.fluid_model, @@ -1360,12 +1424,11 @@ impl Terrain { } if response.started_tick == started_tick { + // This was the latest worker for this chunk, so we don't need to worry + // about canceling any later tasks. self.mesh_todo.remove(&response.pos); } }, - // Chunk must have been removed, or it was spawned on an old tick. Drop the mesh - // since it's either out of date or no longer needed. - Some(_todo) => {}, None => {}, } } diff --git a/voxygen/src/ui/graphic/mod.rs b/voxygen/src/ui/graphic/mod.rs index 1ece067e4f..defb2c4d64 100644 --- a/voxygen/src/ui/graphic/mod.rs +++ b/voxygen/src/ui/graphic/mod.rs @@ -10,7 +10,7 @@ use crate::{ use common::{figure::Segment, slowjob::SlowJobPool}; use guillotiere::{size2, SimpleAtlasAllocator}; use hashbrown::{hash_map::Entry, HashMap}; -use image::{DynamicImage, RgbaImage}; +use image::{DynamicImage, GenericImageView, RgbaImage}; use pixel_art::resize_pixel_art; use slab::Slab; use std::{hash::Hash, sync::Arc}; @@ -411,6 +411,20 @@ fn draw_graphic( // Short-circuit spawning a job on the threadpool for blank graphics Some(Graphic::Blank) => None, Some(inner) => { + // HACK: Only use the pool for "large" graphics (greater than 32x32 source images, + // which is the size of the minimap images). + // + // FIXME: Proper flickering solution. + let pool = if let Graphic::Image(image, _) = inner { + let (w, h) = image.dimensions(); + if w.saturating_mul(h) > 256 * 256 { + pool + } else { + None + } + } else { + pool + }; keyed_jobs .spawn(pool, (graphic_id, dims), || { let inner = inner.clone(); diff --git a/voxygen/src/ui/keyed_jobs.rs b/voxygen/src/ui/keyed_jobs.rs index 042c99e80c..0e7a1dee05 100644 --- a/voxygen/src/ui/keyed_jobs.rs +++ b/voxygen/src/ui/keyed_jobs.rs @@ -63,7 +63,7 @@ impl Key // which means that it completed while we tried to GC its pending // struct, which means that we'll GC it in the next cycle, so ignore // the error in this collection. - let _ = pool.cancel(job); + let _ = job.cancel(); } } fresh @@ -89,7 +89,7 @@ impl Key // approximating that let tx = self.tx.clone(); let f = f(); - let job = pool.spawn(self.name, move || { + let job = pool.spawn(&self.name, move || { let v = f(&k); let _ = tx.send((k, v)); }); diff --git a/world/src/layer/cave.rs b/world/src/layer/cave.rs index ae91d523ec..5073ec650e 100644 --- a/world/src/layer/cave.rs +++ b/world/src/layer/cave.rs @@ -600,7 +600,7 @@ fn write_column( for z in bedrock..z_range.end { let wpos = wpos2d.with_z(z); let mut try_spawn_entity = false; - canvas.map(wpos, |_block| { + canvas.set(wpos, { if z < z_range.start - 4 && !void_below { Block::new(BlockKind::Lava, Rgb::new(255, 65, 0)) } else if basalt > 0.0 @@ -844,7 +844,7 @@ fn write_column( { Block::air(sprite) } else { - get_mushroom(wpos, rng).unwrap_or(Block::air(SpriteKind::Empty)) + /*get_mushroom(wpos, rng).unwrap_or(*/Block::air(SpriteKind::Empty)/*)*/ } });