Changes to wgpu to hopefully improve scalability?

This commit is contained in:
Joshua Yanovski 2022-08-23 00:54:08 -07:00
parent 9863670169
commit 5a5f2b391f
12 changed files with 69 additions and 47 deletions

10
Cargo.lock generated
View File

@ -1614,7 +1614,7 @@ dependencies = [
[[package]]
name = "egui_wgpu_backend"
version = "0.8.0"
source = "git+https://github.com/hasenbanck/egui_wgpu_backend.git?rev=63a002c6a9b6c016e45806dd065864431caab621#63a002c6a9b6c016e45806dd065864431caab621"
source = "git+https://github.com/pythonesque/egui_wgpu_backend.git?rev=7a811c5f8915a1486f477ed7d8907d22579c6b17#7a811c5f8915a1486f477ed7d8907d22579c6b17"
dependencies = [
"bytemuck",
"epi",
@ -7566,7 +7566,7 @@ dependencies = [
[[package]]
name = "wgpu"
version = "0.8.0"
source = "git+https://github.com/pythonesque/wgpu.git?rev=b30d573c9aca0787f4ff7aba8bb0db7f823b7064#b30d573c9aca0787f4ff7aba8bb0db7f823b7064"
source = "git+https://github.com/pythonesque/wgpu.git?rev=3858992e25d494fe7544ec3064221bbdafd5a547#3858992e25d494fe7544ec3064221bbdafd5a547"
dependencies = [
"arrayvec 0.5.2",
"js-sys",
@ -7586,7 +7586,7 @@ dependencies = [
[[package]]
name = "wgpu-core"
version = "0.8.0"
source = "git+https://github.com/pythonesque/wgpu.git?rev=b30d573c9aca0787f4ff7aba8bb0db7f823b7064#b30d573c9aca0787f4ff7aba8bb0db7f823b7064"
source = "git+https://github.com/pythonesque/wgpu.git?rev=3858992e25d494fe7544ec3064221bbdafd5a547#3858992e25d494fe7544ec3064221bbdafd5a547"
dependencies = [
"arrayvec 0.5.2",
"bitflags",
@ -7617,7 +7617,7 @@ dependencies = [
[[package]]
name = "wgpu-profiler"
version = "0.4.0"
source = "git+https://github.com/Imberflur/wgpu-profiler?tag=wgpu-0.8#b156eb145bc223386ef344860d9b33b3c181650c"
source = "git+https://github.com/pythonesque/wgpu-profiler?rev=b6b16da077a1aca2fe3a2ceaa3bdcc1bdeb11d74#b6b16da077a1aca2fe3a2ceaa3bdcc1bdeb11d74"
dependencies = [
"futures",
"wgpu",
@ -7626,7 +7626,7 @@ dependencies = [
[[package]]
name = "wgpu-types"
version = "0.8.0"
source = "git+https://github.com/pythonesque/wgpu.git?rev=b30d573c9aca0787f4ff7aba8bb0db7f823b7064#b30d573c9aca0787f4ff7aba8bb0db7f823b7064"
source = "git+https://github.com/pythonesque/wgpu.git?rev=3858992e25d494fe7544ec3064221bbdafd5a547#3858992e25d494fe7544ec3064221bbdafd5a547"
dependencies = [
"bitflags",
"serde",

View File

@ -123,7 +123,10 @@ nativeBuildInputs = ["pkg-config"]
vek = { git = "https://github.com/yoanlcq/vek.git", rev = "84d5cb65841d46599a986c5477341bea4456be26" }
# patch wgpu so we can use wgpu-profiler crate
# wgpu = { git = "https://github.com/gfx-rs/wgpu.git", rev = "a92b8549a8e2cb9dac781bafc5ed32828f3caf46" }
wgpu = { git = "https://github.com/pythonesque/wgpu.git", rev = "b30d573c9aca0787f4ff7aba8bb0db7f823b7064" }
wgpu = { git = "https://github.com/pythonesque/wgpu.git", rev = "3858992e25d494fe7544ec3064221bbdafd5a547" }
# wgpu = { path = "../wgpu/wgpu" }
# wgpu-core = { path = "../wgpu/wgpu-core" }
# wgpu-types = { path = "../wgpu/wgpu-types" }
# # use the latest fixes in naga (remove when updates trickle down to wgpu-rs)
# naga = { git = "https://github.com/gfx-rs/naga.git", rev = "3a0f0144112ff621dd7f731bf455adf6cab19164" }

View File

@ -3,7 +3,7 @@
(
caverns: false, // TODO: Disabled by default until cave overhaul
caves: true,
caves: false,
rocks: true,
shrubs: true,
trees: true,

View File

@ -56,7 +56,9 @@ voxygen-egui = {package = "veloren-voxygen-egui", path = "egui", optional = true
# Graphics
winit = {version = "0.25.0", features = ["serde"]}
wgpu = { version = "=0.8.0", features = ["trace", "cross"] }
wgpu-profiler = { git = "https://github.com/Imberflur/wgpu-profiler", tag = "wgpu-0.8" }
# wgpu-profiler = { git = "https://github.com/Imberflur/wgpu-profiler", tag = "wgpu-0.8" }
# wgpu-profiler = { path = "../../wgpu-profiler" }
wgpu-profiler = { git = "https://github.com/pythonesque/wgpu-profiler", rev = "b6b16da077a1aca2fe3a2ceaa3bdcc1bdeb11d74" }
bytemuck = { version="1.4", features=["derive"] }
shaderc = "0.8.0"
# cmake crate used by shaderc, newer version 0.1.46 uses a new cmake command that is not available in our CI cmake version
@ -75,7 +77,9 @@ keyboard-keynames = { git = "https://gitlab.com/Frinksy/keyboard-keynames.git",
# EGUI
egui = {version = "0.12", optional = true }
egui_wgpu_backend = {git = "https://github.com/hasenbanck/egui_wgpu_backend.git", rev = "63a002c6a9b6c016e45806dd065864431caab621", optional = true }
# egui_wgpu_backend = {git = "https://github.com/hasenbanck/egui_wgpu_backend.git", rev = "63a002c6a9b6c016e45806dd065864431caab621", optional = true }
# egui_wgpu_backend = { path = "../../egui_wgpu_backend", optional = true }
egui_wgpu_backend = {git = "https://github.com/pythonesque/egui_wgpu_backend.git", rev = "7a811c5f8915a1486f477ed7d8907d22579c6b17", optional = true }
egui_winit_platform = {version = "0.8", optional = true }
# ECS

View File

@ -1004,7 +1004,9 @@ pub fn generate_mesh<'a/*, V: RectRasterableVol<Vox = Block> + ReadVol + Debug +
// Allocate the fresh mesh.
let mut col_lights = create_texture(col_lights_alloc_size);
let col_lights_size = col_lights.as_mut().map(|col_lights| {
finalize(bytemuck::cast_slice_mut(&mut col_lights.get_mapped_mut(0, col_lights.len())))
let slice = col_lights.get_mapped_mut(0, col_lights.len());
let mut buf = slice.get_mapped_range_mut();
finalize(bytemuck::cast_slice_mut(&mut buf))
}).unwrap_or(Vec2::broadcast(0));
(
opaque_mesh,

View File

@ -49,14 +49,14 @@ impl<T: Copy + Pod> Buffer<T> {
///
/// NOTE: Will panic if the buffer was not explicitly mapped before this (without being
/// unmapped), either directly or via [Buffer::new_mapped].
pub fn get_mapped_mut(&self, offset: usize, len: usize) -> wgpu::BufferViewMut<'_> {
pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> wgpu::BufferSliceMut<'_> {
/* if !vals.is_empty() {
let contents = bytemuck::cast_slice(vals); */
let size_ty = std::mem::size_of::<T>() as u64;
let offset = offset as u64 * size_ty;
let size = /*vals.len()*/len as u64 * size_ty;
/* bytemuck::cast_slice_mut(&mut */self.buf.slice(offset..offset + size).get_mapped_range_mut()/* ) */
/* bytemuck::cast_slice_mut(&mut */self.buf.slice_mut(offset..offset + size)/*.get_mapped_range_mut()*//* ) */
/* .copy_from_slice(contents);
} */
}
@ -70,7 +70,7 @@ impl<T: Copy + Pod> Buffer<T> {
/// NOTE: Queue is not *explicitly* used here, but it is implicitly used during the unmap
/// (within wgpu internals) when mapped at creation, and requires acquiring a lock on it,
/// so it's left in the API to deter people from using it when the queue isn't available.
pub fn unmap(&self, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */) {
pub fn unmap(&mut self/*, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */*/) {
/* if !vals.is_empty() {
let contents = bytemuck::cast_slice(vals);
@ -126,3 +126,7 @@ impl<T: Copy + Pod> std::ops::Deref for DynamicBuffer<T> {
fn deref(&self) -> &Self::Target { &self.0 }
}
impl<T: Copy + Pod> std::ops::DerefMut for DynamicBuffer<T> {
fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 }
}

View File

@ -39,14 +39,14 @@ impl<T: Copy + Pod> Consts<T> {
/// Get the GPU-side mapped slice represented by this constant handle, if it was previously
/// memory mapped.
pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferSliceMut<'_> {
self.buf.get_mapped_mut(offset, len)
}
/// Unmaps the GPU-side handle represented by this constant handle, if it was previously
/// memory-mapped.
pub fn unmap(&self, queue: &wgpu::Queue) {
self.buf.unmap(queue);
pub fn unmap(&mut self, queue: &wgpu::Queue) {
self.buf.unmap(/*queue*/);
}
pub fn buf(&self) -> &wgpu::Buffer { &self.buf.buf }

View File

@ -21,14 +21,14 @@ impl<T: Copy + Pod> Instances<T> {
/// Get the GPU-side mapped slice represented by this instances buffer, if it was previously
/// memory mapped.
pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferSliceMut<'_> {
self.buf.get_mapped_mut(offset, len)
}
/// Unmaps the GPU-side handle represented by this instances buffer, if it was previously
/// memory-mapped.
pub fn unmap(&self, queue: &wgpu::Queue) {
self.buf.unmap(queue);
pub fn unmap(&mut self/*, queue: &wgpu::Queue*/) {
self.buf.unmap(/*queue*/);
}
// TODO: count vs len naming scheme??

View File

@ -55,14 +55,14 @@ impl<V: Vertex> Model<V> {
/// Get the GPU-side mapped slice represented by this model handle, if it was previously
/// memory mapped.
pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferSliceMut<'_> {
self.vbuf.get_mapped_mut(offset, len)
}
/// Unmaps the GPU-side handle represented by this model handle, if it was previously
/// memory-mapped.
pub fn unmap(&self, queue: &wgpu::Queue) {
self.vbuf.unmap(queue);
pub fn unmap(&mut self/*, queue: &wgpu::Queue*/) {
self.vbuf.unmap(/*queue*/);
}
/// Create a model with a slice of a portion of this model to send to the

View File

@ -512,8 +512,8 @@ impl Renderer {
// the previous frame.
let (maintain_tx, maintain_rx) = channel::bounded(0);
let device_ = Arc::clone(&device);
/* std::thread::spawn(move || {
/* let device_ = Arc::clone(&device);
std::thread::spawn(move || {
// Maintain each time we are requested to do so, until the renderer dies.
// Additionally, accepts CPU->GPU tasks containing updates to perform that need to lock
// the device (but not necessarily the queue?). This is a hopefully temporary measure
@ -806,6 +806,7 @@ impl Renderer {
pub fn maintain(&self) {
if self.is_minimized {
self.queue.submit(std::iter::empty());
// self.device.poll(wgpu::Maintain::Poll);
}
// If the send fails, we can (generally) assume it's because the channel is out of
@ -1286,7 +1287,7 @@ impl Renderer {
}
/// Unmaps a set of memory mapped consts.
pub fn unmap_consts<T: Copy + bytemuck::Pod>(&self, consts: &Consts<T>) {
pub fn unmap_consts<T: Copy + bytemuck::Pod>(&self, consts: &mut Consts<T>) {
consts.unmap(&self.queue)
}
@ -1317,8 +1318,8 @@ impl Renderer {
}
/// Unmaps a set of memory mapped instances.
pub fn unmap_instances<T: Copy + bytemuck::Pod>(&self, instances: &Instances<T>) {
instances.unmap(&self.queue)
pub fn unmap_instances<T: Copy + bytemuck::Pod>(&self, instances: &mut Instances<T>) {
instances.unmap(/*&self.queue*/)
}
/// Update the expected index length to be large enough for a quad vertex bfufer with this many
@ -1412,16 +1413,17 @@ impl Renderer {
let create_model = self.create_model_lazy_base(usage);
move |mesh| {
let len = mesh.vertices().len();
let model = create_model(len)?;
model.get_mapped_mut(0, len)
let mut model = create_model(len)?;
let slice = model.get_mapped_mut(0, len);
slice.get_mapped_range_mut()
.copy_from_slice(bytemuck::cast_slice(mesh.vertices()));
Some(model)
}
}
/// Unmaps a memory mapped model.
pub fn unmap_model<V: Vertex>(&self, model: &Model<V>) {
model.unmap(&self.queue);
pub fn unmap_model<V: Vertex>(&self, model: &mut Model<V>) {
model.unmap(/*&self.queue*/);
}
/// Create a new dynamic model with the specified size.

View File

@ -131,7 +131,7 @@ impl TakeScreenshot {
}
}
fn download_and_handle_internal(self) {
fn download_and_handle_internal(mut self) {
// Calculate padded bytes per row
let padded_bytes_per_row = padded_bytes_per_row(self.width, self.bytes_per_pixel);
let singlethread_rt = match tokio::runtime::Builder::new_current_thread().build() {
@ -143,7 +143,7 @@ impl TakeScreenshot {
};
// Map buffer
let buffer_slice = self.buffer.slice(..);
let mut buffer_slice = self.buffer.slice_mut(..);
let buffer_map_future = buffer_slice.map_async(wgpu::MapMode::Read);
// Wait on buffer mapping

View File

@ -389,10 +389,11 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
start = range.end;
range
});
let sprite_instances = create_instances(instance_ranges.iter().map(|range| range.len()).sum());
let mut sprite_instances = create_instances(instance_ranges.iter().map(|range| range.len()).sum());
if start > 0 {
sprite_instances
.get_mapped_mut(0, sprite_instances.count())
let slice = sprite_instances
.get_mapped_mut(0, sprite_instances.count());
slice.get_mapped_range_mut()
.array_chunks_mut::<{ core::mem::size_of::<SpriteInstance>() }>()
.zip(instances.into_iter().flatten()).for_each(|(dst, src)| {
// FIXME: cast doesn't work because bytemuck::cast isn't const generic-ified
@ -400,10 +401,16 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
// *dst = bytemuck::cast(src);
dst.copy_from_slice(bytemuck::cast_slice(&[src]));
});
/* sprite_instances.unmap(); */
}
(instance_ranges, sprite_instances)
},
mesh,
mesh/* : mesh.map(|mut mesh| {
mesh.opaque_model.as_mut().map(|model| model.unmap());
mesh.fluid_model.as_mut().map(|model| model.unmap());
mesh.col_lights_info.0.as_mut().map(|model| model.unmap());
mesh
})*/,
blocks_of_interest,
started_tick,
}
@ -1307,7 +1314,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
let create_fluid = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX);
let create_instances = renderer.create_instances_lazy();
/* let create_locals = renderer.create_terrain_bound_locals(); */
let create_texture = renderer./*create_texture_raw*/create_model_lazy_base(wgpu::BufferUsage::COPY_SRC);
let create_texture = renderer./*create_texture_raw*/create_model_lazy_base(wgpu::BufferUsage::COPY_SRC/* | wgpu::BufferUsage::MAP_WRITE */);
/* cnt.fetch_add(1, Ordering::Relaxed); */
let job = move || {
// Since this loads when the task actually *runs*, rather than when it's
@ -1368,15 +1375,15 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
if max_recv_count > 0 {
// Construct a buffer for all the chunks we're going to process in this frame. There might
// be some unused slots, which is fine.
let locals = /*Arc::new(*/renderer.create_consts_mapped(wgpu::BufferUsage::empty(), max_recv_count as usize)/*)*/;
let mut locals_buffer = locals.get_mapped_mut(0, locals.len());
let mut locals = /*Arc::new(*/renderer.create_consts_mapped(wgpu::BufferUsage::empty(), max_recv_count as usize)/*)*/;
let mut locals_bound = renderer.create_terrain_bound_locals(&locals/*, locals_offset */);
let mut locals_buffer = locals.get_mapped_mut(0, locals.len());
let mut encoder = renderer.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Update textures."),
});
for (locals_offset, (response, locals_buffer)) in incoming_chunks.zip(locals_buffer.array_chunks_mut::<{ core::mem::size_of::<TerrainLocals>() }>()).enumerate() {
for (locals_offset, (mut response, locals_buffer)) in incoming_chunks.zip(locals_buffer.get_mapped_range_mut().array_chunks_mut::<{ core::mem::size_of::<TerrainLocals>() }>()).enumerate() {
let pos = response.pos;
let response_started_tick = response.started_tick;
match self.mesh_todo.get(&pos) {
@ -1401,7 +1408,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
.unwrap_or(current_time as f32);
// TODO: Allocate new atlas on allocation failure.
let (tex, tex_size) = mesh.col_lights_info;
let tex = tex.expect("The mesh exists, so the texture should too.");
let mut tex = tex.expect("The mesh exists, so the texture should too.");
let atlas = &mut self.atlas;
let chunks = &mut self.chunks;
let col_lights = &mut self.col_lights;
@ -1449,10 +1456,10 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
// lock as of wgpu 0.8.1).
//
// FIXME: When we upgrade wgpu, reconsider all this.
renderer.unmap_instances(&response.sprite_instances.1);
mesh.opaque_model.as_ref().map(|model| renderer.unmap_model(model));
mesh.fluid_model.as_ref().map(|model| renderer.unmap_model(model));
renderer.unmap_model(&tex);
renderer.unmap_instances(&mut response.sprite_instances.1);
mesh.opaque_model.as_mut().map(|model| renderer.unmap_model(model));
mesh.fluid_model.as_mut().map(|model| renderer.unmap_model(model));
renderer.unmap_model(&mut tex);
// NOTE: Cast is safe since the origin was a u16.
let atlas_offs = Vec2::new(
@ -1556,7 +1563,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
}
// Drop the memory mapping and unmap the locals.
drop(locals_buffer);
renderer.unmap_consts(&locals);
renderer.unmap_consts(&mut locals);
// Drop buffer on background thread.
slowjob.spawn(&"TERRAIN_DROP", move || { drop(locals); });
/* // TODO: Delay submission, don't just submit immediately out of convenience!