Changes to wgpu to hopefully improve scalability?

2024-08-30 18:12:32 +00:00 · 2022-08-23 00:54:08 -07:00
parent 9863670169
commit 5a5f2b391f
12 changed files with 69 additions and 47 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1614,7 +1614,7 @@ dependencies = [
 [[package]]
 name = "egui_wgpu_backend"
 version = "0.8.0"
-source = "git+https://github.com/hasenbanck/egui_wgpu_backend.git?rev=63a002c6a9b6c016e45806dd065864431caab621#63a002c6a9b6c016e45806dd065864431caab621"
+source = "git+https://github.com/pythonesque/egui_wgpu_backend.git?rev=7a811c5f8915a1486f477ed7d8907d22579c6b17#7a811c5f8915a1486f477ed7d8907d22579c6b17"
 dependencies = [
 "bytemuck",
 "epi",
@ -7566,7 +7566,7 @@ dependencies = [
 [[package]]
 name = "wgpu"
 version = "0.8.0"
-source = "git+https://github.com/pythonesque/wgpu.git?rev=b30d573c9aca0787f4ff7aba8bb0db7f823b7064#b30d573c9aca0787f4ff7aba8bb0db7f823b7064"
+source = "git+https://github.com/pythonesque/wgpu.git?rev=3858992e25d494fe7544ec3064221bbdafd5a547#3858992e25d494fe7544ec3064221bbdafd5a547"
 dependencies = [
 "arrayvec 0.5.2",
 "js-sys",
@ -7586,7 +7586,7 @@ dependencies = [
 [[package]]
 name = "wgpu-core"
 version = "0.8.0"
-source = "git+https://github.com/pythonesque/wgpu.git?rev=b30d573c9aca0787f4ff7aba8bb0db7f823b7064#b30d573c9aca0787f4ff7aba8bb0db7f823b7064"
+source = "git+https://github.com/pythonesque/wgpu.git?rev=3858992e25d494fe7544ec3064221bbdafd5a547#3858992e25d494fe7544ec3064221bbdafd5a547"
 dependencies = [
 "arrayvec 0.5.2",
 "bitflags",
@ -7617,7 +7617,7 @@ dependencies = [
 [[package]]
 name = "wgpu-profiler"
 version = "0.4.0"
-source = "git+https://github.com/Imberflur/wgpu-profiler?tag=wgpu-0.8#b156eb145bc223386ef344860d9b33b3c181650c"
+source = "git+https://github.com/pythonesque/wgpu-profiler?rev=b6b16da077a1aca2fe3a2ceaa3bdcc1bdeb11d74#b6b16da077a1aca2fe3a2ceaa3bdcc1bdeb11d74"
 dependencies = [
 "futures",
 "wgpu",
@ -7626,7 +7626,7 @@ dependencies = [
 [[package]]
 name = "wgpu-types"
 version = "0.8.0"
-source = "git+https://github.com/pythonesque/wgpu.git?rev=b30d573c9aca0787f4ff7aba8bb0db7f823b7064#b30d573c9aca0787f4ff7aba8bb0db7f823b7064"
+source = "git+https://github.com/pythonesque/wgpu.git?rev=3858992e25d494fe7544ec3064221bbdafd5a547#3858992e25d494fe7544ec3064221bbdafd5a547"
 dependencies = [
 "bitflags",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -123,7 +123,10 @@ nativeBuildInputs = ["pkg-config"]
 vek = { git = "https://github.com/yoanlcq/vek.git", rev = "84d5cb65841d46599a986c5477341bea4456be26" }
 # patch wgpu so we can use wgpu-profiler crate
 # wgpu = { git = "https://github.com/gfx-rs/wgpu.git", rev = "a92b8549a8e2cb9dac781bafc5ed32828f3caf46" }
-wgpu = { git = "https://github.com/pythonesque/wgpu.git", rev = "b30d573c9aca0787f4ff7aba8bb0db7f823b7064" }
+wgpu = { git = "https://github.com/pythonesque/wgpu.git", rev = "3858992e25d494fe7544ec3064221bbdafd5a547" }
+# wgpu = { path = "../wgpu/wgpu" }
+# wgpu-core = { path = "../wgpu/wgpu-core" }
+# wgpu-types = { path = "../wgpu/wgpu-types" }

 # # use the latest fixes in naga (remove when updates trickle down to wgpu-rs)
 # naga = { git = "https://github.com/gfx-rs/naga.git", rev = "3a0f0144112ff621dd7f731bf455adf6cab19164" }
--- a/assets/world/features.ron
+++ b/assets/world/features.ron
@ -3,7 +3,7 @@

 (
    caverns: false, // TODO: Disabled by default until cave overhaul
-    caves: true,
+    caves: false,
    rocks: true,
    shrubs: true,
    trees: true,
--- a/voxygen/Cargo.toml
+++ b/voxygen/Cargo.toml
@ -56,7 +56,9 @@ voxygen-egui = {package = "veloren-voxygen-egui", path = "egui", optional = true
 # Graphics
 winit = {version = "0.25.0", features = ["serde"]}
 wgpu = { version = "=0.8.0", features = ["trace", "cross"] }
-wgpu-profiler = { git = "https://github.com/Imberflur/wgpu-profiler", tag = "wgpu-0.8" }
+# wgpu-profiler = { git = "https://github.com/Imberflur/wgpu-profiler", tag = "wgpu-0.8" }
+# wgpu-profiler = { path = "../../wgpu-profiler" }
+wgpu-profiler = { git = "https://github.com/pythonesque/wgpu-profiler", rev = "b6b16da077a1aca2fe3a2ceaa3bdcc1bdeb11d74" }
 bytemuck = { version="1.4", features=["derive"] }
 shaderc = "0.8.0"
 # cmake crate used by shaderc, newer version 0.1.46 uses a new cmake command that is not available in our CI cmake version
@ -75,7 +77,9 @@ keyboard-keynames = { git = "https://gitlab.com/Frinksy/keyboard-keynames.git",

 # EGUI
 egui = {version = "0.12", optional = true }
-egui_wgpu_backend = {git = "https://github.com/hasenbanck/egui_wgpu_backend.git", rev = "63a002c6a9b6c016e45806dd065864431caab621", optional = true }
+# egui_wgpu_backend = {git = "https://github.com/hasenbanck/egui_wgpu_backend.git", rev = "63a002c6a9b6c016e45806dd065864431caab621", optional = true }
+# egui_wgpu_backend = { path = "../../egui_wgpu_backend", optional = true }
+egui_wgpu_backend = {git = "https://github.com/pythonesque/egui_wgpu_backend.git", rev = "7a811c5f8915a1486f477ed7d8907d22579c6b17", optional = true }
 egui_winit_platform = {version = "0.8", optional = true }

 # ECS
--- a/voxygen/src/mesh/terrain.rs
+++ b/voxygen/src/mesh/terrain.rs
@ -1004,7 +1004,9 @@ pub fn generate_mesh<'a/*, V: RectRasterableVol<Vox = Block> + ReadVol + Debug +
    // Allocate the fresh mesh.
    let mut col_lights = create_texture(col_lights_alloc_size);
    let col_lights_size = col_lights.as_mut().map(|col_lights| {
-        finalize(bytemuck::cast_slice_mut(&mut col_lights.get_mapped_mut(0, col_lights.len())))
+        let slice = col_lights.get_mapped_mut(0, col_lights.len());
+        let mut buf = slice.get_mapped_range_mut();
+        finalize(bytemuck::cast_slice_mut(&mut buf))
    }).unwrap_or(Vec2::broadcast(0));
    (
        opaque_mesh,
--- a/voxygen/src/render/buffer.rs
+++ b/voxygen/src/render/buffer.rs
@ -49,14 +49,14 @@ impl<T: Copy + Pod> Buffer<T> {
    ///
    /// NOTE: Will panic if the buffer was not explicitly mapped before this (without being
    /// unmapped), either directly or via [Buffer::new_mapped].
-    pub fn get_mapped_mut(&self, offset: usize, len: usize) -> wgpu::BufferViewMut<'_> {
+    pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> wgpu::BufferSliceMut<'_> {
        /* if !vals.is_empty() {
            let contents = bytemuck::cast_slice(vals); */

            let size_ty = std::mem::size_of::<T>() as u64;
            let offset = offset as u64 * size_ty;
            let size = /*vals.len()*/len as u64 * size_ty;
-            /* bytemuck::cast_slice_mut(&mut */self.buf.slice(offset..offset + size).get_mapped_range_mut()/* ) */
+            /* bytemuck::cast_slice_mut(&mut */self.buf.slice_mut(offset..offset + size)/*.get_mapped_range_mut()*//* ) */
                /* .copy_from_slice(contents);
        } */
    }
@ -70,7 +70,7 @@ impl<T: Copy + Pod> Buffer<T> {
    /// NOTE: Queue is not *explicitly* used here, but it is implicitly used during the unmap
    /// (within wgpu internals) when mapped at creation, and requires acquiring a lock on it,
    /// so it's left in the API to deter people from using it when the queue isn't available.
-    pub fn unmap(&self, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */) {
+    pub fn unmap(&mut self/*, _queue: &wgpu::Queue/* , vals: &[T], offset: usize */*/) {
        /* if !vals.is_empty() {
            let contents = bytemuck::cast_slice(vals);

@ -126,3 +126,7 @@ impl<T: Copy + Pod> std::ops::Deref for DynamicBuffer<T> {

    fn deref(&self) -> &Self::Target { &self.0 }
 }
+
+impl<T: Copy + Pod> std::ops::DerefMut for DynamicBuffer<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 }
+}
--- a/voxygen/src/render/consts.rs
+++ b/voxygen/src/render/consts.rs
@ -39,14 +39,14 @@ impl<T: Copy + Pod> Consts<T> {

    /// Get the GPU-side mapped slice represented by this constant handle, if it was previously
    /// memory mapped.
-    pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
+    pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferSliceMut<'_> {
        self.buf.get_mapped_mut(offset, len)
    }

    /// Unmaps the GPU-side handle represented by this constant handle, if it was previously
    /// memory-mapped.
-    pub fn unmap(&self, queue: &wgpu::Queue) {
-        self.buf.unmap(queue);
+    pub fn unmap(&mut self, queue: &wgpu::Queue) {
+        self.buf.unmap(/*queue*/);
    }

    pub fn buf(&self) -> &wgpu::Buffer { &self.buf.buf }
--- a/voxygen/src/render/instances.rs
+++ b/voxygen/src/render/instances.rs
@ -21,14 +21,14 @@ impl<T: Copy + Pod> Instances<T> {

    /// Get the GPU-side mapped slice represented by this instances buffer, if it was previously
    /// memory mapped.
-    pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
+    pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferSliceMut<'_> {
        self.buf.get_mapped_mut(offset, len)
    }

    /// Unmaps the GPU-side handle represented by this instances buffer, if it was previously
    /// memory-mapped.
-    pub fn unmap(&self, queue: &wgpu::Queue) {
-        self.buf.unmap(queue);
+    pub fn unmap(&mut self/*, queue: &wgpu::Queue*/) {
+        self.buf.unmap(/*queue*/);
    }

    // TODO: count vs len naming scheme??
--- a/voxygen/src/render/model.rs
+++ b/voxygen/src/render/model.rs
@ -55,14 +55,14 @@ impl<V: Vertex> Model<V> {

    /// Get the GPU-side mapped slice represented by this model handle, if it was previously
    /// memory mapped.
-    pub fn get_mapped_mut(&self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferViewMut<'_> {
+    pub fn get_mapped_mut(&mut self, offset: usize, len: usize) -> /* &mut [T] */wgpu::BufferSliceMut<'_> {
        self.vbuf.get_mapped_mut(offset, len)
    }

    /// Unmaps the GPU-side handle represented by this model handle, if it was previously
    /// memory-mapped.
-    pub fn unmap(&self, queue: &wgpu::Queue) {
-        self.vbuf.unmap(queue);
+    pub fn unmap(&mut self/*, queue: &wgpu::Queue*/) {
+        self.vbuf.unmap(/*queue*/);
    }

    /// Create a model with a slice of a portion of this model to send to the
--- a/voxygen/src/render/renderer.rs
+++ b/voxygen/src/render/renderer.rs
@ -512,8 +512,8 @@ impl Renderer {
        // the previous frame.
        let (maintain_tx, maintain_rx) = channel::bounded(0);

-        let device_ = Arc::clone(&device);
-        /* std::thread::spawn(move || {
+        /* let device_ = Arc::clone(&device);
+        std::thread::spawn(move || {
            // Maintain each time we are requested to do so, until the renderer dies.
            // Additionally, accepts CPU->GPU tasks containing updates to perform that need to lock
            // the device (but not necessarily the queue?).  This is a hopefully temporary measure
@ -806,6 +806,7 @@ impl Renderer {
    pub fn maintain(&self) {
        if self.is_minimized {
            self.queue.submit(std::iter::empty());
+            // self.device.poll(wgpu::Maintain::Poll);
        }

        // If the send fails, we can (generally) assume it's because the channel is out of
@ -1286,7 +1287,7 @@ impl Renderer {
    }

    /// Unmaps a set of memory mapped consts.
-    pub fn unmap_consts<T: Copy + bytemuck::Pod>(&self, consts: &Consts<T>) {
+    pub fn unmap_consts<T: Copy + bytemuck::Pod>(&self, consts: &mut Consts<T>) {
        consts.unmap(&self.queue)
    }

@ -1317,8 +1318,8 @@ impl Renderer {
    }

    /// Unmaps a set of memory mapped instances.
-    pub fn unmap_instances<T: Copy + bytemuck::Pod>(&self, instances: &Instances<T>) {
-        instances.unmap(&self.queue)
+    pub fn unmap_instances<T: Copy + bytemuck::Pod>(&self, instances: &mut Instances<T>) {
+        instances.unmap(/*&self.queue*/)
    }

    /// Update the expected index length to be large enough for a quad vertex bfufer with this many
@ -1412,16 +1413,17 @@ impl Renderer {
        let create_model = self.create_model_lazy_base(usage);
        move |mesh| {
            let len = mesh.vertices().len();
-            let model = create_model(len)?;
-            model.get_mapped_mut(0, len)
+            let mut model = create_model(len)?;
+            let slice = model.get_mapped_mut(0, len);
+            slice.get_mapped_range_mut()
                .copy_from_slice(bytemuck::cast_slice(mesh.vertices()));
            Some(model)
        }
    }

    /// Unmaps a memory mapped model.
-    pub fn unmap_model<V: Vertex>(&self, model: &Model<V>) {
-        model.unmap(&self.queue);
+    pub fn unmap_model<V: Vertex>(&self, model: &mut Model<V>) {
+        model.unmap(/*&self.queue*/);
    }

    /// Create a new dynamic model with the specified size.
--- a/voxygen/src/render/renderer/screenshot.rs
+++ b/voxygen/src/render/renderer/screenshot.rs
@ -131,7 +131,7 @@ impl TakeScreenshot {
        }
    }

-    fn download_and_handle_internal(self) {
+    fn download_and_handle_internal(mut self) {
        // Calculate padded bytes per row
        let padded_bytes_per_row = padded_bytes_per_row(self.width, self.bytes_per_pixel);
        let singlethread_rt = match tokio::runtime::Builder::new_current_thread().build() {
@ -143,7 +143,7 @@ impl TakeScreenshot {
        };

        // Map buffer
-        let buffer_slice = self.buffer.slice(..);
+        let mut buffer_slice = self.buffer.slice_mut(..);
        let buffer_map_future = buffer_slice.map_async(wgpu::MapMode::Read);

        // Wait on buffer mapping
--- a/voxygen/src/scene/terrain.rs
+++ b/voxygen/src/scene/terrain.rs
@ -389,10 +389,11 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
                start = range.end;
                range
            });
-            let sprite_instances = create_instances(instance_ranges.iter().map(|range| range.len()).sum());
+            let mut sprite_instances = create_instances(instance_ranges.iter().map(|range| range.len()).sum());
            if start > 0 {
-                sprite_instances
-                    .get_mapped_mut(0, sprite_instances.count())
+                let slice = sprite_instances
+                    .get_mapped_mut(0, sprite_instances.count());
+                slice.get_mapped_range_mut()
                    .array_chunks_mut::<{ core::mem::size_of::<SpriteInstance>() }>()
                    .zip(instances.into_iter().flatten()).for_each(|(dst, src)| {
                    // FIXME: cast doesn't work because bytemuck::cast isn't const generic-ified
@ -400,10 +401,16 @@ fn mesh_worker/*<V: BaseVol<Vox = Block> + RectRasterableVol + ReadVol + Debug +
                    // *dst = bytemuck::cast(src);
                    dst.copy_from_slice(bytemuck::cast_slice(&[src]));
                });
+                /* sprite_instances.unmap(); */
            }
            (instance_ranges, sprite_instances)
        },
-        mesh,
+        mesh/* : mesh.map(|mut mesh| {
+            mesh.opaque_model.as_mut().map(|model| model.unmap());
+            mesh.fluid_model.as_mut().map(|model| model.unmap());
+            mesh.col_lights_info.0.as_mut().map(|model| model.unmap());
+            mesh
+        })*/,
        blocks_of_interest,
        started_tick,
    }
@ -1307,7 +1314,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
            let create_fluid = renderer.create_model_lazy(wgpu::BufferUsage::VERTEX);
            let create_instances = renderer.create_instances_lazy();
            /* let create_locals = renderer.create_terrain_bound_locals(); */
-            let create_texture = renderer./*create_texture_raw*/create_model_lazy_base(wgpu::BufferUsage::COPY_SRC);
+            let create_texture = renderer./*create_texture_raw*/create_model_lazy_base(wgpu::BufferUsage::COPY_SRC/* | wgpu::BufferUsage::MAP_WRITE */);
            /* cnt.fetch_add(1, Ordering::Relaxed); */
            let job = move || {
                // Since this loads when the task actually *runs*, rather than when it's
@ -1368,15 +1375,15 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
        if max_recv_count > 0 {
        // Construct a buffer for all the chunks we're going to process in this frame.  There might
        // be some unused slots, which is fine.
-        let locals = /*Arc::new(*/renderer.create_consts_mapped(wgpu::BufferUsage::empty(), max_recv_count as usize)/*)*/;
-        let mut locals_buffer = locals.get_mapped_mut(0, locals.len());
+        let mut locals = /*Arc::new(*/renderer.create_consts_mapped(wgpu::BufferUsage::empty(), max_recv_count as usize)/*)*/;
        let mut locals_bound = renderer.create_terrain_bound_locals(&locals/*, locals_offset */);
+        let mut locals_buffer = locals.get_mapped_mut(0, locals.len());
        let mut encoder = renderer.device
            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
                label: Some("Update textures."),
            });

-        for (locals_offset, (response, locals_buffer)) in incoming_chunks.zip(locals_buffer.array_chunks_mut::<{ core::mem::size_of::<TerrainLocals>() }>()).enumerate() {
+        for (locals_offset, (mut response, locals_buffer)) in incoming_chunks.zip(locals_buffer.get_mapped_range_mut().array_chunks_mut::<{ core::mem::size_of::<TerrainLocals>() }>()).enumerate() {
            let pos = response.pos;
            let response_started_tick = response.started_tick;
            match self.mesh_todo.get(&pos) {
@ -1401,7 +1408,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                            .unwrap_or(current_time as f32);
                        // TODO: Allocate new atlas on allocation failure.
                        let (tex, tex_size) = mesh.col_lights_info;
-                        let tex = tex.expect("The mesh exists, so the texture should too.");
+                        let mut tex = tex.expect("The mesh exists, so the texture should too.");
                        let atlas = &mut self.atlas;
                        let chunks = &mut self.chunks;
                        let col_lights = &mut self.col_lights;
@ -1449,10 +1456,10 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
                        // lock as of wgpu 0.8.1).
                        //
                        // FIXME: When we upgrade wgpu, reconsider all this.
-                        renderer.unmap_instances(&response.sprite_instances.1);
-                        mesh.opaque_model.as_ref().map(|model| renderer.unmap_model(model));
-                        mesh.fluid_model.as_ref().map(|model| renderer.unmap_model(model));
-                        renderer.unmap_model(&tex);
+                        renderer.unmap_instances(&mut response.sprite_instances.1);
+                        mesh.opaque_model.as_mut().map(|model| renderer.unmap_model(model));
+                        mesh.fluid_model.as_mut().map(|model| renderer.unmap_model(model));
+                        renderer.unmap_model(&mut tex);

                        // NOTE: Cast is safe since the origin was a u16.
                        let atlas_offs = Vec2::new(
@ -1556,7 +1563,7 @@ impl/*<V: RectRasterableVol>*/ Terrain<V> {
        }
        // Drop the memory mapping and unmap the locals.
        drop(locals_buffer);
-        renderer.unmap_consts(&locals);
+        renderer.unmap_consts(&mut locals);
        // Drop buffer on background thread.
        slowjob.spawn(&"TERRAIN_DROP", move || { drop(locals); });
        /* // TODO: Delay submission, don't just submit immediately out of convenience!