mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
Progress on GPU premultiplication.
* General progress in setting up code paths to support GPU premultiplication. * Created `PremultiplyUpload` type to represent an initiated image upload where the premultiply pass needs to be ran to complete it. * Converted from compute pass to render pass since current limitations make it difficult to write directly to a srgb image from a compute shader. * Replace `CachedDetails::Immutable` with keeping track of the parameters used to create the texture (i.e. the border color). * Create `TextureRequirements`, `TextureParamters`, and `CacheKey` types to encode parameters that go into texture creation and image caching and to determine when the space in texture memory should be reused when replacing a graphic. * Add custom texture creation logic for the UI textures since those need certain usage combinations.
This commit is contained in:
parent
1d51aae3b2
commit
efd932c71e
@ -43,6 +43,16 @@ vec3 linear_to_srgb(vec3 col) {
|
||||
);
|
||||
}
|
||||
|
||||
vec4 srgba8_to_linear(uint srgba8) {
|
||||
uvec4 nonlinear = vec4(uvec4(
|
||||
(srgba8 >> 24) & 0xFFu,
|
||||
(srgba8 >> 16) & 0xFFu,
|
||||
(srgba8 >> 8) & 0xFFu,
|
||||
srgba8 & 0xFFu
|
||||
)) / 255.0;
|
||||
return vec4(srgb_to_linear(nonlinear.rgb), nonlinear.a);
|
||||
}
|
||||
|
||||
float pow5(float x) {
|
||||
float x2 = x * x;
|
||||
return x2 * x2 * x;
|
||||
|
@ -1,36 +0,0 @@
|
||||
#version 420 core
|
||||
|
||||
// TODO: should we modify this based on the current device?
|
||||
// TODO: would it be better to have 2D workgroup for writing to a local area in the target image?
|
||||
layout(local_size_x = 256) in;
|
||||
|
||||
// TODO: writing all images into a single buffer?
|
||||
layout(set = 0, binding = 0) readonly buffer InputImage {
|
||||
uint input_pixels[];
|
||||
};
|
||||
|
||||
layout (std140, set = 0, binding = 1)
|
||||
uniform u_locals {
|
||||
// Size of the input image.
|
||||
uvec2 image_size;
|
||||
// Offset to place the transformed input image at in the target
|
||||
// image.
|
||||
uvec2 target_offset;
|
||||
};
|
||||
|
||||
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D target_image;
|
||||
|
||||
void main() {
|
||||
uint global_id = gl_GlobalInvocationId.x;
|
||||
uvec2 src_pixel_pos = uvec2(global_id % image_size.x, global_id / image_size.x);
|
||||
// Otherwise this is is an out of bounds compute instance.
|
||||
if (src_pixel_pos < image_size.y) {
|
||||
uint pixel = input_pixels[global_id];
|
||||
vec4 nonlinear = vec4((pixel >> 16) & 0xFFu, (pixel >> 8) & 0xFFu, (pixel >> 8) & 0xFFu, pixel & 0xFFu);
|
||||
vec4 linear;
|
||||
vec4 premultiplied_linear;
|
||||
vec4 premultiplied_nonlinear;
|
||||
// No free srgb with image store operations https://www.khronos.org/opengl/wiki/Image_Load_Store#Format_compatibility
|
||||
imageStore(target_image, src_pixel_pos + target_offset, premultiplied_nonlinear);
|
||||
}
|
||||
}
|
16
assets/voxygen/shaders/premultiply-alpha-frag.glsl
Normal file
16
assets/voxygen/shaders/premultiply-alpha-frag.glsl
Normal file
@ -0,0 +1,16 @@
|
||||
#version 420 core
|
||||
|
||||
layout(set = 0, binding = 0)
|
||||
uniform texture2D source_texture;
|
||||
|
||||
layout(location = 0) in vec2 source_coords;
|
||||
|
||||
layout(location = 0) out vec4 target_color;
|
||||
|
||||
void main() {
|
||||
// We get free nonlinear -> linear conversion when sampling from srgb texture;
|
||||
vec4 linear = texelFetch(source_texture, ivec2(source_coords), 0);
|
||||
vec4 premultiplied_linear = vec4(linear.rgb * linear.a, linear.a);
|
||||
// We get free linear -> nonlinear conversion rendering to srgb texture.
|
||||
target_color = premultiplied_linear;
|
||||
}
|
48
assets/voxygen/shaders/premultiply-alpha-vert.glsl
Normal file
48
assets/voxygen/shaders/premultiply-alpha-vert.glsl
Normal file
@ -0,0 +1,48 @@
|
||||
#version 420 core
|
||||
|
||||
layout(push_constant) uniform Params {
|
||||
// Size of the source image.
|
||||
uint source_size_xy;
|
||||
// Offset to place the image at in the target texture.
|
||||
//
|
||||
// Origin is the top-left.
|
||||
uint target_offset_xy;
|
||||
// Size of the target texture.
|
||||
uint target_size_xy;
|
||||
};
|
||||
|
||||
layout(location = 0) out vec2 source_coords;
|
||||
|
||||
uvec2 unpack(uint xy) {
|
||||
return uvec2(
|
||||
bitfieldExtract(xy, 0, 16),
|
||||
bitfieldExtract(xy, 16, 16),
|
||||
);
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec2 source_size = vec2(unpack(source_size_xy));
|
||||
vec2 target_offset = vec2(unpack(target_offset_size_xy));
|
||||
vec2 target_size = vec2(unpack(target_size_xy));
|
||||
|
||||
// Generate rectangle (counter clockwise triangles)
|
||||
//
|
||||
// 0 0 1 1 1 0
|
||||
float x_select = float(((uint(gl_VertexIndex) + 1u) / 3u) % 2u);
|
||||
// 1 0 0 0 1 1
|
||||
float y_select = float(((uint(gl_VertexIndex) + 5u) / 3u) % 2u);
|
||||
|
||||
source_coords = vec2(
|
||||
// left -> right (on screen)
|
||||
mix(0.0, 1.0, x_select),
|
||||
// bottom -> top (on screen)
|
||||
mix(1.0, 0.0, y_select),
|
||||
);
|
||||
|
||||
vec2 target_coords_normalized = (target_offset + source_coords * source_size) / target_size;
|
||||
|
||||
// Flip y and transform [0.0, 1.0] -> [-1.0, 1.0] to get NDC coordinates.
|
||||
vec2 v_pos = ((target_coords_normalized * 2.0) - vec2(1.0)) * vec2(1.0, -1.0);
|
||||
|
||||
gl_Position = vec4(v_pos, 0.0, 1.0);
|
||||
}
|
@ -43,7 +43,8 @@ pub use self::{
|
||||
create_quad as create_ui_quad,
|
||||
create_quad_vert_gradient as create_ui_quad_vert_gradient, create_tri as create_ui_tri,
|
||||
BoundLocals as UiBoundLocals, Locals as UiLocals, Mode as UiMode,
|
||||
TextureBindGroup as UiTextureBindGroup, Vertex as UiVertex,
|
||||
PremultiplyUpload as UiPremultiplyUpload, TextureBindGroup as UiTextureBindGroup,
|
||||
Vertex as UiVertex,
|
||||
},
|
||||
GlobalModel, Globals, GlobalsBindGroup, GlobalsLayouts, Light, Shadow,
|
||||
},
|
||||
|
@ -1,8 +1,21 @@
|
||||
use super::super::{Bound, Consts, GlobalsLayouts, Quad, Texture, Tri, Vertex as VertexTrait};
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
use core::num::NonZeroU32;
|
||||
use std::mem;
|
||||
use vek::*;
|
||||
|
||||
// TODO: profile UI rendering before and after on laptop.
|
||||
|
||||
/// The format of textures that the UI sources image data from.
|
||||
///
|
||||
/// Note, the is not directly used in all relevant locations, but still helps to
|
||||
/// more clearly document the that this is the format being used. Notably,
|
||||
/// textures are created via `renderer.create_dynamic_texture(...)` and
|
||||
/// `renderer.create_texture(&DynamicImage::ImageRgba(image), ...)` (TODO:
|
||||
/// update if we have to refactor when implementing the RENDER_ATTACHMENT
|
||||
/// usage).
|
||||
const UI_IMAGE_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8UnormSrgb;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
|
||||
pub struct Vertex {
|
||||
@ -132,8 +145,8 @@ pub struct TextureBindGroup {
|
||||
}
|
||||
|
||||
pub struct UiLayout {
|
||||
pub locals: wgpu::BindGroupLayout,
|
||||
pub texture: wgpu::BindGroupLayout,
|
||||
locals: wgpu::BindGroupLayout,
|
||||
texture: wgpu::BindGroupLayout,
|
||||
}
|
||||
|
||||
impl UiLayout {
|
||||
@ -395,20 +408,77 @@ pub fn create_tri(
|
||||
)
|
||||
}
|
||||
|
||||
// Steps:
|
||||
// 1. Upload new image via `Device::create_buffer_init`, with `MAP_WRITE` flag
|
||||
// to avoid staging buffer.
|
||||
// 2. Run compute pipeline to multiply by alpha reading from this buffer and
|
||||
// writing to the final texture (this may be in an atlas or an independent
|
||||
// texture if the image is over a certain size threshold).
|
||||
// Premultiplying alpha on the GPU before placing images into the textures that
|
||||
// will be sampled from in the UI pipeline.
|
||||
//
|
||||
// Info needed in compute shader:
|
||||
// * source buffer
|
||||
// * target texture
|
||||
// * image dimensions
|
||||
// * position in the target texture
|
||||
// (what is the overhead of compute call? at some point we may be better off
|
||||
// converting small images on the cpu)
|
||||
// Steps:
|
||||
//
|
||||
// 1. Upload new image via `Device::create_texture_with_data`.
|
||||
//
|
||||
// (NOTE: Initially considered: Creating a storage buffer to read from in the
|
||||
// shader via `Device::create_buffer_init`, with `MAP_WRITE` flag to avoid
|
||||
// staging buffer. However, with dedicated GPUs combining usages other than
|
||||
// `COPY_SRC` with `MAP_WRITE` may be less ideal. Plus, by copying into a
|
||||
// texture first we can get free srgb conversion when fetching colors
|
||||
// from the texture. In the future, we may want to branch based on the
|
||||
// whether the GPU is integrated and avoid this extra copy.)
|
||||
//
|
||||
// 2. Run render pipeline to multiply by alpha reading from this texture and
|
||||
// writing to the final texture (this can either be in an atlas or in an
|
||||
// independent texture if the image is over a certain size threshold).
|
||||
//
|
||||
// (NOTE: Initially considered: using a compute pipeline and writing to the
|
||||
// final texture as a storage texture. However, the srgb format can't be used
|
||||
// with storage texture and there is not yet the capability to create
|
||||
// non-srgb views of srgb textures.)
|
||||
//
|
||||
// Info needed:
|
||||
//
|
||||
// * source texture (texture binding)
|
||||
// * target texture (render attachment)
|
||||
// * source image dimensions (push constant)
|
||||
// * target texture dimensions (push constant)
|
||||
// * position in the target texture (push constant)
|
||||
//
|
||||
// TODO: potential optimizations
|
||||
// * what is the overhead of this draw call call? at some point we may be better
|
||||
// off converting very small images on the cpu and/or batching these into a
|
||||
// single draw call
|
||||
// * what is the overhead of creating new small textures? for processing many
|
||||
// small images would it be useful to create a single texture the same size as
|
||||
// our cache texture and use Queue::write_texture?
|
||||
// * is using create_buffer_init and reading directly from that (with manual
|
||||
// srgb conversion) worth avoiding staging buffer/copy-to-texture for
|
||||
// integrated GPUs?
|
||||
// * premultipying alpha in a release asset preparation step
|
||||
|
||||
pub struct PremultiplyAlphaLayout {
|
||||
source_texture: wgpu::BindGroupLayout,
|
||||
}
|
||||
|
||||
impl PremultiplyAlphaLayout {
|
||||
pub fn new(device: &wgpu::Device) -> Self {
|
||||
Self {
|
||||
source_texture: device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: None,
|
||||
entries: &[
|
||||
// source_texture
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStage::FRAGMENT,
|
||||
ty: wgpu::BindingType::Texture {
|
||||
sample_type: wgpu::TextureSampleType::Float { filterable: false },
|
||||
view_dimension: wgpu::TextureViewDimension::D2,
|
||||
multisampled: false,
|
||||
},
|
||||
count: None,
|
||||
},
|
||||
],
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PremultiplyAlphaPipeline {
|
||||
pub pipeline: wgpu::RenderPipeline,
|
||||
}
|
||||
@ -416,22 +486,163 @@ pub struct PremultiplyAlphaPipeline {
|
||||
impl PremultiplyAlphaPipeline {
|
||||
pub fn new(
|
||||
device: &wgpu::Device,
|
||||
module: &wgpu::ShaderModule,
|
||||
layout: &PremultiplAlphaLayout,
|
||||
vs_module: &wgpu::ShaderModule,
|
||||
fs_module: &wgpu::ShaderModule,
|
||||
layout: &PremultiplyAlphaLayout,
|
||||
) -> Self {
|
||||
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||
label: Some("Premultiply alpha pipeline layout"),
|
||||
push_constant_ranges: &[],
|
||||
bind_group_layouts: &[layout],
|
||||
bind_group_layouts: &[&layout.source_texture],
|
||||
push_constant_ranges: &[wgpu::PushConstantRange {
|
||||
stages: wgpu::ShaderStage::VERTEX,
|
||||
range: 0..core::mem::size_of::<PremultiplyAlphaParams>() as u32,
|
||||
}],
|
||||
});
|
||||
|
||||
let pipeline = device.create_compute_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
label: Some("Premultiply alpha pipeline"),
|
||||
layout: Some(&pipeline_layout),
|
||||
module,
|
||||
entry_point: "main",
|
||||
vertex: wgpu::VertexState {
|
||||
module: vs_module,
|
||||
entry_point: "main",
|
||||
buffers: &[],
|
||||
},
|
||||
primitive: wgpu::PrimitiveState {
|
||||
topology: wgpu::PrimitiveTopology::TriangleList,
|
||||
strip_index_format: None,
|
||||
front_face: wgpu::FrontFace::Ccw,
|
||||
cull_mode: Some(wgpu::Face::Back),
|
||||
clamp_depth: false,
|
||||
polygon_mode: wgpu::PolygonMode::Fill,
|
||||
conservative: false,
|
||||
},
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: fs_module,
|
||||
entry_point: "main",
|
||||
targets: &[wgpu::ColorTargetState {
|
||||
format: UI_IMAGE_FORMAT,
|
||||
blend: None,
|
||||
write_mask: wgpu::ColorWrite::ALL,
|
||||
}],
|
||||
}),
|
||||
});
|
||||
|
||||
Self { pipeline }
|
||||
}
|
||||
}
|
||||
|
||||
/// Uploaded as push constant.
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
|
||||
pub struct PremultiplyAlphaParams {
|
||||
/// Size of the source image.
|
||||
source_size_xy: u32,
|
||||
/// Offset to place the image at in the target texture.
|
||||
///
|
||||
/// Origin is the top-left.
|
||||
target_offset_xy: u32,
|
||||
/// Size of the target texture.
|
||||
target_size_xy: u32,
|
||||
}
|
||||
|
||||
/// An image upload that needs alpha premultiplication and which is in a pending
|
||||
/// state.
|
||||
///
|
||||
/// From here we will use the `PremultiplyAlpha` pipeline to premultiply the
|
||||
/// alpha while transfering the image to its destination texture.
|
||||
pub struct PremultiplyUpload {
|
||||
source_bg: wgpu::BindGroup,
|
||||
source_size_xy: u32,
|
||||
/// The location in the final texture this will be placed at. Technically,
|
||||
/// we don't need this information at this point but it is convenient to
|
||||
/// store it here.
|
||||
offset: Vec2<u16>,
|
||||
}
|
||||
|
||||
impl PremultiplyUpload {
|
||||
pub fn prepare(
|
||||
device: &wgpu::Device,
|
||||
queue: &wgpu::Queue,
|
||||
layout: &PremultiplyAlphaLayout,
|
||||
image: &image::RgbaImage,
|
||||
offset: Vec2<u16>,
|
||||
) -> Self {
|
||||
// TODO: duplicating some code from `Texture` since:
|
||||
// 1. We don't need to create a sampler.
|
||||
// 2. Texture::new accepts &DynamicImage which isn't possible to create from
|
||||
// &RgbaImage without cloning.
|
||||
let image_size = wgpu::Extent3d {
|
||||
width: image.width(),
|
||||
height: image.height(),
|
||||
depth_or_array_layers: 1,
|
||||
};
|
||||
let source_tex = device.create_texture(&wgpu::TextureDescriptor {
|
||||
label: None,
|
||||
size: image_size,
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: wgpu::TextureFormat::Rgba8UnormSrgb,
|
||||
usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
|
||||
});
|
||||
queue.write_texture(
|
||||
wgpu::ImageCopyTexture {
|
||||
texture: &source_tex,
|
||||
mip_level: 0,
|
||||
origin: wgpu::Origin3d::ZERO,
|
||||
},
|
||||
&(&**image)[..(image.width() as usize * image.height() as usize)],
|
||||
wgpu::ImageDataLayout {
|
||||
offset: 0,
|
||||
bytes_per_row: NonZeroU32::new(image.width() * 4),
|
||||
rows_per_image: NonZeroU32::new(image.height()),
|
||||
},
|
||||
image_size,
|
||||
);
|
||||
// Create view to use to create bind group
|
||||
let view = source_tex.create_view(&wgpu::TextureViewDescriptor {
|
||||
label: None,
|
||||
format: Some(wgpu::TextureFormat::Rgba8UnormSrgb),
|
||||
dimension: Some(wgpu::TextureViewDimension::D2),
|
||||
aspect: wgpu::TextureAspect::All,
|
||||
base_mip_level: 0,
|
||||
mip_level_count: None,
|
||||
base_array_layer: 0,
|
||||
array_layer_count: None,
|
||||
});
|
||||
let source_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: None,
|
||||
layout: &layout.source_texture,
|
||||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::TextureView(&view),
|
||||
}],
|
||||
});
|
||||
|
||||
// NOTE: We assume the max texture size is less than u16::MAX.
|
||||
let source_size_xy = image_size.width + image_size.height << 16;
|
||||
|
||||
Self {
|
||||
source_bg,
|
||||
source_size_xy,
|
||||
offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Semantically, this consumes the `PremultiplyUpload` but we need to keep
|
||||
/// the bind group alive to the end of the render pass and don't want to
|
||||
/// bother storing it somewhere else.
|
||||
pub fn draw_data(&self, target: &Texture) -> (&wgpu::BindGroup, PremultiplyAlphaParams) {
|
||||
let target_offset_xy = u32::from(self.offset.x) + u32::from(self.offset.y) << 16;
|
||||
let target_dims = target.get_dimensions();
|
||||
// NOTE: We assume the max texture size is less than u16::MAX.
|
||||
let target_size_xy = target_dims.x + target_dims.y << 16;
|
||||
(&self.source_bg, PremultiplyAlphaParams {
|
||||
source_size_xy: self.source_size_xy,
|
||||
target_offset_xy,
|
||||
target_size_xy,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ struct ImmutableLayouts {
|
||||
clouds: clouds::CloudsLayout,
|
||||
bloom: bloom::BloomLayout,
|
||||
ui: ui::UiLayout,
|
||||
premultiply_alpha: ui::PremultiplyAlphaLayout,
|
||||
blit: blit::BlitLayout,
|
||||
}
|
||||
|
||||
@ -393,6 +394,7 @@ impl Renderer {
|
||||
&pipeline_modes,
|
||||
));
|
||||
let ui = ui::UiLayout::new(&device);
|
||||
let premultiply_alpha = ui::PremultiplyAlphaLayout::new(&device);
|
||||
let blit = blit::BlitLayout::new(&device);
|
||||
|
||||
let immutable = Arc::new(ImmutableLayouts {
|
||||
@ -407,6 +409,7 @@ impl Renderer {
|
||||
clouds,
|
||||
bloom,
|
||||
ui,
|
||||
premultiply_alpha,
|
||||
blit,
|
||||
});
|
||||
|
||||
@ -1434,6 +1437,20 @@ impl Renderer {
|
||||
texture.update(&self.queue, offset, size, bytemuck::cast_slice(data))
|
||||
}
|
||||
|
||||
pub fn prepare_premultiply_upload(
|
||||
&self,
|
||||
image: &image::RgbaImage,
|
||||
offset: Vec2<u16>,
|
||||
) -> ui::PremultiplyUpload {
|
||||
ui::PremultiplyUpload::prepare(
|
||||
&self.device,
|
||||
&self.queue,
|
||||
&self.layouts.premultiply_alpha,
|
||||
image,
|
||||
offset,
|
||||
)
|
||||
}
|
||||
|
||||
/// Queue to obtain a screenshot on the next frame render
|
||||
pub fn create_screenshot(
|
||||
&mut self,
|
||||
|
@ -12,6 +12,7 @@ use super::{
|
||||
rain_occlusion_map::{RainOcclusionMap, RainOcclusionMapRenderer},
|
||||
Renderer, ShadowMap, ShadowMapRenderer,
|
||||
};
|
||||
use common_base::prof_span;
|
||||
use core::{num::NonZeroU32, ops::Range};
|
||||
use std::sync::Arc;
|
||||
use vek::Aabr;
|
||||
@ -424,6 +425,44 @@ impl<'frame> Drawer<'frame> {
|
||||
});
|
||||
}
|
||||
|
||||
pub fn run_ui_premultiply_passes<'a>(
|
||||
&mut self,
|
||||
targets: impl Iterator<Item = (&'a super::super::Texture, Vec<ui::PremultiplyUpload>)>,
|
||||
) {
|
||||
let encoder = self.encoder.as_mut().unwrap();
|
||||
let device = self.borrow.device;
|
||||
|
||||
// TODO: What is the CPU overhead of each renderpass?
|
||||
for (i, (target_texture, uploads)) in targets.enumerate() {
|
||||
prof_span!("ui premultiply pass");
|
||||
tracing::info!("{} uploads", uploads.len());
|
||||
let profile_name = format!("ui_premultiply_pass {}", i);
|
||||
let label = format!("ui premultiply pass {}", i);
|
||||
// TODO: a GPU profile scope on each of the passes here may be a bit too fine
|
||||
// grained.
|
||||
let mut render_pass =
|
||||
encoder.scoped_render_pass(&profile_name, device, &wgpu::RenderPassDescriptor {
|
||||
label: Some(&label),
|
||||
color_attachments: &[wgpu::RenderPassColorAttachment {
|
||||
view: &target_texture.view,
|
||||
resolve_target: None,
|
||||
ops: wgpu::Operations {
|
||||
load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
|
||||
store: true,
|
||||
},
|
||||
}],
|
||||
depth_stencil_attachment: None,
|
||||
});
|
||||
for upload in &uploads {
|
||||
let (source_bind_group, push_constant_data) = upload.draw_data(target_texture);
|
||||
let bytes = bytemuck::bytes_of(&push_constant_data);
|
||||
render_pass.set_bind_group(0, source_bind_group, &[]);
|
||||
render_pass.set_push_constants(wgpu::ShaderStage::VERTEX, 0, bytes);
|
||||
render_pass.draw_indexed(0..6, 0, 0..1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn third_pass(&mut self) -> ThirdPassDrawer {
|
||||
let encoder = self.encoder.as_mut().unwrap();
|
||||
let device = self.borrow.device;
|
||||
|
@ -33,6 +33,7 @@ pub struct Pipelines {
|
||||
pub lod_object: lod_object::LodObjectPipeline,
|
||||
pub terrain: terrain::TerrainPipeline,
|
||||
pub ui: ui::UiPipeline,
|
||||
pub premultiply_alpha: ui::PremultiplyAlphaPipeline,
|
||||
pub blit: blit::BlitPipeline,
|
||||
}
|
||||
|
||||
@ -79,6 +80,7 @@ pub struct IngameAndShadowPipelines {
|
||||
/// Use to decouple interface pipeline creation when initializing the renderer
|
||||
pub struct InterfacePipelines {
|
||||
pub ui: ui::UiPipeline,
|
||||
pub premultiply_alpha: ui::PremultiplyAlphaPipeline,
|
||||
pub blit: blit::BlitPipeline,
|
||||
}
|
||||
|
||||
@ -100,6 +102,7 @@ impl Pipelines {
|
||||
lod_object: ingame.lod_object,
|
||||
terrain: ingame.terrain,
|
||||
ui: interface.ui,
|
||||
premultiply_alpha: interface.premultiply_alpha,
|
||||
blit: interface.blit,
|
||||
}
|
||||
}
|
||||
@ -127,6 +130,8 @@ struct ShaderModules {
|
||||
trail_frag: wgpu::ShaderModule,
|
||||
ui_vert: wgpu::ShaderModule,
|
||||
ui_frag: wgpu::ShaderModule,
|
||||
premultiply_alpha_vert: wgpu::ShaderModule,
|
||||
premultiply_alpha_frag: wgpu::ShaderModule,
|
||||
lod_terrain_vert: wgpu::ShaderModule,
|
||||
lod_terrain_frag: wgpu::ShaderModule,
|
||||
clouds_vert: wgpu::ShaderModule,
|
||||
@ -336,6 +341,8 @@ impl ShaderModules {
|
||||
trail_frag: create_shader("trail-frag", ShaderKind::Fragment)?,
|
||||
ui_vert: create_shader("ui-vert", ShaderKind::Vertex)?,
|
||||
ui_frag: create_shader("ui-frag", ShaderKind::Fragment)?,
|
||||
premultiply_alpha_vert: create_shader("premultiply-alpha-vert", ShaderKind::Vertex)?,
|
||||
premultiply_alpha_frag: create_shader("premultiply-alpha-frag", ShaderKind::Fragment)?,
|
||||
lod_terrain_vert: create_shader("lod-terrain-vert", ShaderKind::Vertex)?,
|
||||
lod_terrain_frag: create_shader("lod-terrain-frag", ShaderKind::Fragment)?,
|
||||
clouds_vert: create_shader("clouds-vert", ShaderKind::Vertex)?,
|
||||
@ -416,11 +423,11 @@ struct PipelineNeeds<'a> {
|
||||
fn create_interface_pipelines(
|
||||
needs: PipelineNeeds,
|
||||
pool: &rayon::ThreadPool,
|
||||
tasks: [Task; 2],
|
||||
tasks: [Task; 3],
|
||||
) -> InterfacePipelines {
|
||||
prof_span!(_guard, "create_interface_pipelines");
|
||||
|
||||
let [ui_task, blit_task] = tasks;
|
||||
let [ui_task, premultiply_alpha_task, blit_task] = tasks;
|
||||
// Construct a pipeline for rendering UI elements
|
||||
let create_ui = || {
|
||||
ui_task.run(
|
||||
@ -438,6 +445,20 @@ fn create_interface_pipelines(
|
||||
)
|
||||
};
|
||||
|
||||
let create_premultiply_alpha = || {
|
||||
premultiply_alpha_task.run(
|
||||
|| {
|
||||
ui::PremultiplyAlphaPipeline::new(
|
||||
needs.device,
|
||||
&needs.shaders.premultiply_alpha_vert,
|
||||
&needs.shaders.premultiply_alpha_frag,
|
||||
&needs.layouts.premultiply_alpha,
|
||||
)
|
||||
},
|
||||
"premultiply alpha pipeline creation",
|
||||
)
|
||||
};
|
||||
|
||||
// Construct a pipeline for blitting, used during screenshotting
|
||||
let create_blit = || {
|
||||
blit_task.run(
|
||||
@ -454,9 +475,15 @@ fn create_interface_pipelines(
|
||||
)
|
||||
};
|
||||
|
||||
let (ui, blit) = pool.join(create_ui, create_blit);
|
||||
let (ui, (premultiply_alpha, blit)) = pool.join(create_ui, || {
|
||||
pool.join(create_premultiply_alpha, create_blit)
|
||||
});
|
||||
|
||||
InterfacePipelines { ui, blit }
|
||||
InterfacePipelines {
|
||||
ui,
|
||||
premultiply_alpha,
|
||||
blit,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create IngamePipelines and shadow pipelines in parallel
|
||||
|
@ -73,6 +73,8 @@ impl assets::Compound for Shaders {
|
||||
"trail-frag",
|
||||
"ui-vert",
|
||||
"ui-frag",
|
||||
"premultiply-alpha-vert",
|
||||
"premultiply_alpha-frag",
|
||||
"lod-terrain-vert",
|
||||
"lod-terrain-frag",
|
||||
"clouds-vert",
|
||||
|
@ -224,6 +224,7 @@ impl Texture {
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: remove `get` from this name
|
||||
/// Get dimensions of the represented image.
|
||||
pub fn get_dimensions(&self) -> vek::Vec3<u32> {
|
||||
vek::Vec3::new(
|
||||
|
@ -51,7 +51,9 @@ impl Cache {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn glyph_cache_tex(&self) -> &(Texture, UiTextureBindGroup) { &self.glyph_cache_tex }
|
||||
pub fn glyph_cache_tex(&self) -> (&Texture, &UiTextureBindGroup) {
|
||||
(&self.glyph_cache_tex.0, &self.glyph_cache_tex.1)
|
||||
}
|
||||
|
||||
pub fn cache_mut_and_tex(
|
||||
&mut self,
|
||||
|
@ -4,7 +4,7 @@ pub mod renderer;
|
||||
pub use renderer::{SampleStrat, Transform};
|
||||
|
||||
use crate::{
|
||||
render::{Renderer, Texture, UiTextureBindGroup},
|
||||
render::{Renderer, Texture, UiPremultiplyUpload, UiTextureBindGroup},
|
||||
ui::KeyedJobs,
|
||||
};
|
||||
use common::{figure::Segment, slowjob::SlowJobPool};
|
||||
@ -12,7 +12,7 @@ use guillotiere::{size2, SimpleAtlasAllocator};
|
||||
use hashbrown::{hash_map::Entry, HashMap};
|
||||
use image::{DynamicImage, RgbaImage};
|
||||
use slab::Slab;
|
||||
use std::{hash::Hash, sync::Arc};
|
||||
use std::{borrow::Cow, hash::Hash, sync::Arc};
|
||||
use tracing::{error, warn};
|
||||
use vek::*;
|
||||
|
||||
@ -29,6 +29,7 @@ pub enum Graphic {
|
||||
Image(Arc<DynamicImage>, Option<Rgba<f32>>),
|
||||
// Note: none of the users keep this Arc currently
|
||||
Voxel(Arc<Segment>, Transform, SampleStrat),
|
||||
// TODO: Re-evaluate whether we need this (especially outside conrod context)
|
||||
Blank,
|
||||
}
|
||||
|
||||
@ -63,11 +64,11 @@ pub struct TexId(usize);
|
||||
|
||||
enum CachedDetails {
|
||||
Atlas {
|
||||
// Index of the atlas this is cached in
|
||||
// Index of the atlas this is cached in.
|
||||
atlas_idx: usize,
|
||||
// Whether this texture is valid.
|
||||
valid: bool,
|
||||
// Where in the cache texture this is
|
||||
// Where in the cache texture this is.
|
||||
aabr: Aabr<u16>,
|
||||
},
|
||||
Texture {
|
||||
@ -76,10 +77,6 @@ enum CachedDetails {
|
||||
// Whether this texture is valid.
|
||||
valid: bool,
|
||||
},
|
||||
Immutable {
|
||||
// Index of the (unique, immutable, non-atlas) texture this is cached in.
|
||||
index: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl CachedDetails {
|
||||
@ -89,10 +86,8 @@ impl CachedDetails {
|
||||
fn info(
|
||||
&self,
|
||||
atlases: &[(SimpleAtlasAllocator, usize)],
|
||||
textures: &Slab<(Texture, UiTextureBindGroup)>,
|
||||
textures: &Slab<(Texture, UiTextureBindGroup, Vec<UiPremultiplyUpload>)>,
|
||||
) -> (usize, bool, Aabr<u16>) {
|
||||
// NOTE: We don't accept images larger than u16::MAX (rejected in `cache_res`)
|
||||
// (and probably would not be able to create a texture this large).
|
||||
match *self {
|
||||
CachedDetails::Atlas {
|
||||
atlas_idx,
|
||||
@ -102,38 +97,136 @@ impl CachedDetails {
|
||||
CachedDetails::Texture { index, valid } => {
|
||||
(index, valid, Aabr {
|
||||
min: Vec2::zero(),
|
||||
// Note texture should always match the cached dimensions
|
||||
max: textures[index].0.get_dimensions().xy().map(|e| e as u16),
|
||||
})
|
||||
},
|
||||
CachedDetails::Immutable { index } => {
|
||||
(index, true, Aabr {
|
||||
min: Vec2::zero(),
|
||||
// Note texture should always match the cached dimensions
|
||||
// NOTE (as cast): We don't accept images larger than u16::MAX (rejected in
|
||||
// `cache_res`) (and probably would not be able to create a texture this
|
||||
// large).
|
||||
//
|
||||
// Note texture should always match the cached dimensions.
|
||||
max: textures[index].0.get_dimensions().xy().map(|e| e as u16),
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempt to invalidate this cache entry.
|
||||
/// If invalidation is not possible this returns the index of the texture to
|
||||
/// deallocate
|
||||
fn invalidate(&mut self) -> Result<(), usize> {
|
||||
/// Invalidate this cache entry.
|
||||
fn invalidate(&mut self) {
|
||||
match self {
|
||||
Self::Atlas { ref mut valid, .. } => {
|
||||
*valid = false;
|
||||
Ok(())
|
||||
},
|
||||
Self::Texture { ref mut valid, .. } => {
|
||||
*valid = false;
|
||||
Ok(())
|
||||
},
|
||||
Self::Immutable { index } => Err(*index),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Requirements that a particular graphic has with respect to the atlas
|
||||
/// allocation or independent texture it will be stored in.
|
||||
///
|
||||
/// If this matches between an old graphic and a new one which is replacing it,
|
||||
/// we can reuse any of the corresponding locations where it is cached in
|
||||
/// textures on the GPU. That is we can invalidate such textures and upload the
|
||||
/// new graphic there, rather than needing to allocate a new texture (or new
|
||||
/// location in an atlas).
|
||||
#[derive(PartialEq)]
|
||||
enum TextureRequirements {
|
||||
/// These are uploaded to the GPU in the original resolution of the image
|
||||
/// supplied by the `Graphic` and any scaling is done during sampling in
|
||||
/// the UI fragment shader.
|
||||
Fixed {
|
||||
size: Vec2<u16>,
|
||||
/// Graphics with a border color specified are placed into their own
|
||||
/// individual textures so that the border color can be set
|
||||
/// there. (Note: this is partially a theoretical description as
|
||||
/// border color options are limited in the current graphics API).
|
||||
border_color: Option<Rgba<f32>>,
|
||||
},
|
||||
/// These are rasterized to the exact resolution that they will be displayed
|
||||
/// at and then uploaded to the GPU. This corresponds to
|
||||
/// `Graphic::Voxel`. There may be multiple copies on the GPU if
|
||||
/// different resolutions are requested.
|
||||
///
|
||||
/// It is expected that the requested sizes will generally not differ when
|
||||
/// switching out a graphic. Thus, dependent cached depdendent should
|
||||
/// always be invalidated since those cached locations will be reusable
|
||||
/// if the requested size is the same.
|
||||
Dependent,
|
||||
}
|
||||
|
||||
/// These solely determine how a place in an atlas will be found or how a
|
||||
/// texture will be created to place the image for a graphic.
|
||||
struct TextureParameters {
|
||||
size: Vec2<u16>,
|
||||
border_color: Option<Rgba<f32>>,
|
||||
}
|
||||
|
||||
/// Key used to refer to an instance of a graphic that has been uploaded to the
|
||||
/// GPU.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
|
||||
struct CacheKey {
|
||||
graphic_id: Id,
|
||||
/// This is `Some` for `TextureRequirements::Dependent`.
|
||||
size: Option<Vec2<u16>>,
|
||||
}
|
||||
|
||||
impl TextureRequirements {
|
||||
fn from_graphic(graphic: &Graphic) -> Option<Self> {
|
||||
match graphic {
|
||||
Graphic::Image(image, border_color) => {
|
||||
// Image sizes over u16::MAX are not supported (and we would probably not be
|
||||
// able to create a texture large enough to hold them on the GPU anyway)!
|
||||
let image_dims = match (u16::try_from(image.width()), u16::try_from(image.height()))
|
||||
{
|
||||
(Ok(x), Ok(y)) if x != 0 && y != 0 => Vec2::new(x, y),
|
||||
_ => {
|
||||
error!(
|
||||
"Image dimensions greater than u16::MAX are not supported! Supplied \
|
||||
image size: ({}, {}).",
|
||||
image.width(),
|
||||
image.height(),
|
||||
);
|
||||
// TODO: reasonable to return None on this error case? We could potentially
|
||||
// validate images sizes on add_graphic/replace_graphic?
|
||||
return None;
|
||||
},
|
||||
};
|
||||
|
||||
Some(Self::Fixed {
|
||||
size: image_dims,
|
||||
border_color: *border_color,
|
||||
})
|
||||
},
|
||||
Graphic::Voxel(_, _, _) => Some(Self::Dependent),
|
||||
Graphic::Blank => None,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: what if requested size is 0? Do we currently panic on this case and
|
||||
// expect caller not to ask for 0 size? (if so document that)
|
||||
fn to_key_and_tex_parameters(
|
||||
self,
|
||||
graphic_id: Id,
|
||||
requested_size: Vec2<u16>,
|
||||
) -> (CacheKey, TextureParameters) {
|
||||
// NOTE: Any external parameters which influence the value of the returned
|
||||
// `TextureParameters` must be included in the `CacheKey`. Otherwise,
|
||||
// invalidation and subsequent re-use of cache locations based on the
|
||||
// value of `self` would be wrong.
|
||||
let (size, border_color, key_size) = match self {
|
||||
Self::Fixed { size, border_color } => (size, border_color, None),
|
||||
Self::Dependent => (requested_size, None, Some(requested_size)),
|
||||
};
|
||||
(
|
||||
CacheKey {
|
||||
graphic_id,
|
||||
size: key_size,
|
||||
},
|
||||
TextureParameters { size, border_color },
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Caches graphics, only deallocates when changing screen resolution (completely
|
||||
// cleared)
|
||||
pub struct GraphicCache {
|
||||
@ -142,27 +235,35 @@ pub struct GraphicCache {
|
||||
/// Next id to use when a new graphic is added
|
||||
next_id: u32,
|
||||
|
||||
/// Atlases with the index of their texture in the textures vec
|
||||
/// Atlases with the index of their texture in the textures slab.
|
||||
atlases: Vec<(SimpleAtlasAllocator, usize)>,
|
||||
textures: Slab<(Texture, UiTextureBindGroup)>,
|
||||
/// Third tuple element is a list of pending premultiply + upload operations
|
||||
/// for this frame. The purpose of this is to collect all the operations
|
||||
/// together so that a single renderpass is performed for each target
|
||||
/// texture.
|
||||
textures: Slab<(Texture, UiTextureBindGroup, Vec<UiPremultiplyUpload>)>,
|
||||
/// The location and details of graphics cached on the GPU.
|
||||
///
|
||||
/// Graphic::Voxel images include the dimensions they were rasterized at in
|
||||
/// the key. Other images are scaled as part of sampling them on the
|
||||
/// GPU.
|
||||
cache_map: HashMap<(Id, Option<Vec2<u16>>), CachedDetails>,
|
||||
cache_map: HashMap<CacheKey, CachedDetails>,
|
||||
|
||||
keyed_jobs: KeyedJobs<(Id, Option<Vec2<u16>>), (RgbaImage, Option<Rgba<f32>>)>,
|
||||
keyed_jobs: KeyedJobs<CacheKey, RgbaImage>,
|
||||
}
|
||||
|
||||
impl GraphicCache {
|
||||
pub fn new(renderer: &mut Renderer) -> Self {
|
||||
let (atlas, texture) = create_atlas_texture(renderer);
|
||||
let (atlas, (tex, bind)) = create_atlas_texture(renderer);
|
||||
|
||||
let mut textures = Slab::new();
|
||||
let tex_id = textures.insert((tex, bind, Vec::new()));
|
||||
|
||||
Self {
|
||||
graphic_map: HashMap::default(),
|
||||
next_id: 0,
|
||||
atlases: vec![(atlas, 0)],
|
||||
textures: core::iter::once((0, texture)).collect(),
|
||||
atlases: vec![(atlas, tex_id)],
|
||||
textures,
|
||||
cache_map: HashMap::default(),
|
||||
keyed_jobs: KeyedJobs::new("IMAGE_PROCESSING"),
|
||||
}
|
||||
@ -179,29 +280,64 @@ impl GraphicCache {
|
||||
}
|
||||
|
||||
pub fn replace_graphic(&mut self, id: Id, graphic: Graphic) {
|
||||
if self.graphic_map.insert(id, graphic).is_none() {
|
||||
// This was not an update, so no need to search for keys.
|
||||
return;
|
||||
}
|
||||
let (old, new) = match self.graphic_map.entry(id) {
|
||||
Entry::Occupied(o) => {
|
||||
let slot_mut = o.into_mut();
|
||||
let old = core::mem::replace(slot_mut, graphic);
|
||||
(old, slot_mut)
|
||||
},
|
||||
Entry::Vacant(v) => {
|
||||
// This was not an update, so no need to cleanup caches.
|
||||
v.insert(graphic);
|
||||
return;
|
||||
},
|
||||
};
|
||||
|
||||
// Remove from caches
|
||||
let old_requirements = TextureRequirements::from_graphic(&old);
|
||||
let new_requirements = TextureRequirements::from_graphic(&new);
|
||||
let should_invalidate = old_requirements == new_requirements && old_requirements.is_some();
|
||||
|
||||
// Invalidate if possible or remove from caches.
|
||||
// Maybe make this more efficient if replace graphic is used more often
|
||||
self.cache_map.retain(|&(key_id, _), details| {
|
||||
// If the entry does not reference id, or it does but we can successfully
|
||||
// invalidate, retain the entry; otherwise, discard this entry completely.
|
||||
key_id != id
|
||||
|| details
|
||||
.invalidate()
|
||||
.map_err(|index| self.textures.remove(index))
|
||||
.is_ok()
|
||||
});
|
||||
// (especially since we should know the exact key for non-voxel
|
||||
// graphics).
|
||||
//
|
||||
// NOTE: at the time of writing, replace_graphic is only used for voxel minimap
|
||||
// updates and item image reloading.
|
||||
if should_invalidate {
|
||||
self.cache_map.iter_mut().for_each(|(key, details)| {
|
||||
if key.graphic_id == id {
|
||||
details.invalidate();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
self.cache_map.drain_filter(|key, details| {
|
||||
if key.graphic_id == id {
|
||||
match details {
|
||||
// TODO: if replace_graphic is used continously for small images (i.e.
|
||||
// images placed into an atlas) of different sizes, that can use up our
|
||||
// atlas space since spots in the atlas can't be reused. (this scenario is
|
||||
// now possible with scaling being done during sampling rather than placing
|
||||
// resized version into the atlas)
|
||||
CachedDetails::Atlas { .. } => {},
|
||||
CachedDetails::Texture { index, .. } => {
|
||||
self.textures.remove(*index);
|
||||
},
|
||||
};
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_graphic(&self, id: Id) -> Option<&Graphic> { self.graphic_map.get(&id) }
|
||||
|
||||
/// Used to acquire textures for rendering
|
||||
pub fn get_tex(&self, id: TexId) -> &(Texture, UiTextureBindGroup) {
|
||||
self.textures.get(id.0).expect("Invalid TexId used")
|
||||
pub fn get_tex(&self, id: TexId) -> (&Texture, &UiTextureBindGroup) {
|
||||
let (tex, bind, _uploads) = self.textures.get(id.0).expect("Invalid TexId used");
|
||||
(tex, bind)
|
||||
}
|
||||
|
||||
pub fn get_graphic_dims(&self, (id, rot): (Id, Rotation)) -> Option<(u32, u32)> {
|
||||
@ -230,20 +366,28 @@ impl GraphicCache {
|
||||
pub fn clear_cache(&mut self, renderer: &mut Renderer) {
|
||||
self.cache_map.clear();
|
||||
|
||||
let (atlas, texture) = create_atlas_texture(renderer);
|
||||
self.atlases = vec![(atlas, 0)];
|
||||
self.textures = core::iter::once((0, texture)).collect();
|
||||
let (atlas, (tex, bind)) = create_atlas_texture(renderer);
|
||||
let mut textures = Slab::new();
|
||||
let tex_id = textures.insert((tex, bind, Vec::new()));
|
||||
self.atlases = vec![(atlas, tex_id)];
|
||||
self.textures = textures;
|
||||
}
|
||||
|
||||
/// Source rectangle should be from 0 to 1, and represents a bounding box
|
||||
/// for the source image of the graphic.
|
||||
///
|
||||
/// [`complete_premultiply_uploads`](Self::complete_premultiply_uploads)
|
||||
/// needs to be called to finalize updates on the GPU that are initiated
|
||||
/// here. Thus, ideally that would be called before drawing UI elements
|
||||
/// using the images cached here.
|
||||
pub fn cache_res(
|
||||
&mut self,
|
||||
renderer: &mut Renderer,
|
||||
pool: Option<&SlowJobPool>,
|
||||
graphic_id: Id,
|
||||
// TODO: if we aren't resizing here we can upload image earlier... (as long as this doesn't
|
||||
// lead to uploading too much unused stuff).
|
||||
// TODO: if we aren't resizing here we can potentially upload the image earlier... (as long
|
||||
// as this doesn't lead to uploading too much unused stuff). (currently not sure whether it
|
||||
// would be an overall gain to pursue this.)
|
||||
requested_dims: Vec2<u16>,
|
||||
source: Aabr<f64>,
|
||||
rotation: Rotation,
|
||||
@ -290,6 +434,7 @@ impl GraphicCache {
|
||||
// S-TODO: A bit hacky inserting this here, just to get things working initially
|
||||
let scale = requested_dims_upright.map2(
|
||||
Vec2::from(scaled.size()),
|
||||
// S-TODO div by zero potential? If so, is NaN an issue in that case?
|
||||
|screen_pixels, sample_pixels: f64| screen_pixels as f32 / sample_pixels as f32,
|
||||
);
|
||||
let transformed = rotated_aabr(scaled);
|
||||
@ -315,13 +460,9 @@ impl GraphicCache {
|
||||
},
|
||||
};
|
||||
|
||||
let key = (
|
||||
graphic_id,
|
||||
// Dimensions only included in the key for voxel graphics which we rasterize at the
|
||||
// size that they will be displayed at (other images are scaled when sampling them on
|
||||
// the GPU).
|
||||
matches!(graphic, Graphic::Voxel { .. }).then(|| requested_dims_upright),
|
||||
);
|
||||
let requirements = TextureRequirements::from_graphic(&graphic)?;
|
||||
let (key, texture_parameters) =
|
||||
requirements.to_key_and_tex_parameters(graphic_id, requested_dims_upright);
|
||||
|
||||
let details = match cache_map.entry(key) {
|
||||
Entry::Occupied(details) => {
|
||||
@ -332,20 +473,23 @@ impl GraphicCache {
|
||||
// graphic
|
||||
if !valid {
|
||||
// Create image
|
||||
let (image, border) = prepare_graphic(
|
||||
let image = prepare_graphic(
|
||||
graphic,
|
||||
graphic_id,
|
||||
key,
|
||||
requested_dims_upright,
|
||||
false,
|
||||
&mut self.keyed_jobs,
|
||||
pool,
|
||||
)?;
|
||||
// If the cache location is invalid, we know the underlying texture is mutable,
|
||||
// so we should be able to replace the graphic. However, we still want to make
|
||||
// sure that we are not reusing textures for images that specify a border
|
||||
// color.
|
||||
assert!(border.is_none());
|
||||
// Ensure we don't have any bugs causing the size used to determine if the
|
||||
// cached version is reusable to not match the size of the image produced by
|
||||
// prepare_graphic.
|
||||
assert_eq!(
|
||||
image.dimensions(),
|
||||
texture_parameters.size.map(u32::from).into_tuple()
|
||||
);
|
||||
// Transfer to the gpu
|
||||
upload_image(renderer, aabr, &textures[idx].0, &image);
|
||||
upload_image(renderer, aabr, &mut textures[idx].2, &image);
|
||||
}
|
||||
|
||||
return Some((transformed_aabr(aabr.map(|e| e as f64)), TexId(idx)));
|
||||
@ -354,62 +498,49 @@ impl GraphicCache {
|
||||
};
|
||||
|
||||
// Construct image in an optional threadpool.
|
||||
let (image, border_color) = prepare_graphic(
|
||||
let image = prepare_graphic(
|
||||
graphic,
|
||||
graphic_id,
|
||||
key,
|
||||
requested_dims_upright,
|
||||
false,
|
||||
&mut self.keyed_jobs,
|
||||
pool,
|
||||
)?;
|
||||
// Assert dimensions of image from `prepare_graphic` are as expected!
|
||||
assert_eq!(
|
||||
image.dimensions(),
|
||||
texture_parameters.size.map(u32::from).into_tuple()
|
||||
);
|
||||
// Image dimensions in the format used by the allocator crate.
|
||||
let image_dims_size2d = size2(
|
||||
i32::from(texture_parameters.size.x),
|
||||
i32::from(texture_parameters.size.y),
|
||||
);
|
||||
|
||||
// Image sizes over u16::MAX are not supported (and we would probably not be
|
||||
// able to create a texture large enough to hold them on the GPU anyway)!
|
||||
let image_dims = match {
|
||||
let (x, y) = image.dimensions();
|
||||
(u16::try_from(x), u16::try_from(y))
|
||||
} {
|
||||
(Ok(x), Ok(y)) => Vec2::new(x, y),
|
||||
_ => {
|
||||
error!(
|
||||
"Image dimensions greater than u16::MAX are not supported! Supplied image \
|
||||
size: {:?}.",
|
||||
image.dimensions()
|
||||
);
|
||||
return None;
|
||||
},
|
||||
};
|
||||
// Now we allocate space on the gpu (either in an atlas or an independent
|
||||
// texture) and upload the image to that location.
|
||||
|
||||
// Upload
|
||||
let atlas_size = atlas_size(renderer);
|
||||
|
||||
// Allocate space on the gpu.
|
||||
//
|
||||
// Graphics with a border color.
|
||||
let location = if let Some(border_color) = border_color {
|
||||
// Create a new immutable texture.
|
||||
let texture = create_image(renderer, image, border_color);
|
||||
// NOTE: All mutations happen only after the upload succeeds!
|
||||
let index = textures.insert(texture);
|
||||
CachedDetails::Immutable { index }
|
||||
// Graphics over a particular size compared to the atlas size are sent
|
||||
// to their own textures. Here we check for ones under that
|
||||
// size.
|
||||
} else if atlas_size
|
||||
.map2(image_dims, |a, d| a as f32 * ATLAS_CUTOFF_FRAC >= d as f32)
|
||||
.reduce_and()
|
||||
{
|
||||
// Graphics that request a border color or which are over a particular size
|
||||
// compared to the atlas size are sent to their own textures.
|
||||
let can_place_in_atlas = texture_parameters.border_color.is_none()
|
||||
&& atlas_size
|
||||
.map2(texture_parameters.size, |a, d| {
|
||||
a as f32 * ATLAS_CUTOFF_FRAC >= d as f32
|
||||
})
|
||||
.reduce_and();
|
||||
let location = if can_place_in_atlas {
|
||||
// Fit into an atlas
|
||||
let mut loc = None;
|
||||
for (atlas_idx, &mut (ref mut atlas, texture_idx)) in atlases.iter_mut().enumerate() {
|
||||
let clamped_dims = image_dims.map(|e| i32::from(e.max(1)));
|
||||
if let Some(rectangle) = atlas.allocate(size2(clamped_dims.x, clamped_dims.y)) {
|
||||
if let Some(rectangle) = atlas.allocate(image_dims_size2d) {
|
||||
let aabr = aabr_from_alloc_rect(rectangle);
|
||||
loc = Some(CachedDetails::Atlas {
|
||||
atlas_idx,
|
||||
valid: true,
|
||||
aabr,
|
||||
});
|
||||
upload_image(renderer, aabr, &textures[texture_idx].0, &image);
|
||||
upload_image(renderer, aabr, &mut textures[texture_idx].2, &image);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -418,17 +549,16 @@ impl GraphicCache {
|
||||
Some(loc) => loc,
|
||||
// Create a new atlas
|
||||
None => {
|
||||
let (mut atlas, texture) = create_atlas_texture(renderer);
|
||||
let clamped_dims = image_dims.map(|e| i32::from(e.max(1)));
|
||||
let (mut atlas, (tex, bind)) = create_atlas_texture(renderer);
|
||||
let aabr = atlas
|
||||
.allocate(size2(clamped_dims.x, clamped_dims.y))
|
||||
.allocate(image_dims_size2d)
|
||||
.map(aabr_from_alloc_rect)
|
||||
.unwrap();
|
||||
// NOTE: All mutations happen only after the texture creation succeeds!
|
||||
let tex_idx = textures.insert(texture);
|
||||
let tex_idx = textures.insert((tex, bind, Vec::new()));
|
||||
let atlas_idx = atlases.len();
|
||||
atlases.push((atlas, tex_idx));
|
||||
upload_image(renderer, aabr, &textures[tex_idx].0, &image);
|
||||
upload_image(renderer, aabr, &mut textures[tex_idx].2, &image);
|
||||
CachedDetails::Atlas {
|
||||
atlas_idx,
|
||||
valid: true,
|
||||
@ -438,23 +568,11 @@ impl GraphicCache {
|
||||
}
|
||||
} else {
|
||||
// Create a texture just for this
|
||||
let texture = {
|
||||
let tex = renderer.create_dynamic_texture(image_dims.map(u32::from));
|
||||
let bind = renderer.ui_bind_texture(&tex);
|
||||
(tex, bind)
|
||||
};
|
||||
// NOTE: All mutations happen only after the texture creation succeeds!
|
||||
let index = textures.insert(texture);
|
||||
upload_image(
|
||||
renderer,
|
||||
Aabr {
|
||||
min: Vec2::zero(),
|
||||
// Note texture should always match the cached dimensions
|
||||
max: image_dims,
|
||||
},
|
||||
&textures[index].0,
|
||||
&image,
|
||||
);
|
||||
let (tex, bind, uploads) = create_image(renderer, &image, texture_parameters);
|
||||
// NOTE: All mutations happen only after the texture creation and upload
|
||||
// initiation succeeds! (completing the upload does not have any failure cases
|
||||
// afaik)
|
||||
let index = textures.insert((tex, bind, uploads));
|
||||
CachedDetails::Texture { index, valid: true }
|
||||
};
|
||||
|
||||
@ -466,54 +584,77 @@ impl GraphicCache {
|
||||
|
||||
Some((transformed_aabr(aabr.map(|e| e as f64)), TexId(idx)))
|
||||
}
|
||||
|
||||
/// Runs render passes with alpha premultiplication pipeline to complete any
|
||||
/// pending uploads.
|
||||
///
|
||||
/// This should be called before starting the pass where the ui is rendered.
|
||||
pub fn complete_premultiply_uploads(&mut self, drawer: &mut crate::render::Drawer<'_>) {
|
||||
drawer.run_ui_premultiply_passes(
|
||||
self.textures
|
||||
.iter_mut()
|
||||
.map(|(_tex_id, (texture, _, uploads))| (&*texture, core::mem::take(uploads))),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepare the graphic into the form that will be uploaded to the GPU.
|
||||
///
|
||||
/// For voxel graphics, draws the graphic at the specified dimensions.
|
||||
///
|
||||
/// Also pre-multiplies alpha in images so they can be linearly filtered on the
|
||||
/// GPU.
|
||||
fn prepare_graphic(
|
||||
graphic: &Graphic,
|
||||
graphic_id: Id,
|
||||
/// Also can pre-multiplies alpha in images so they can be linearly filtered on
|
||||
/// the GPU (this is optional since we also have a path to do this
|
||||
/// premultiplication on the GPU).
|
||||
fn prepare_graphic<'graphic>(
|
||||
graphic: &'graphic Graphic,
|
||||
cache_key: CacheKey,
|
||||
dims: Vec2<u16>,
|
||||
keyed_jobs: &mut KeyedJobs<(Id, Option<Vec2<u16>>), (RgbaImage, Option<Rgba<f32>>)>,
|
||||
premultiply_on_cpu: bool, // TODO: currently unused
|
||||
keyed_jobs: &mut KeyedJobs<CacheKey, RgbaImage>,
|
||||
pool: Option<&SlowJobPool>,
|
||||
) -> Option<(RgbaImage, Option<Rgba<f32>>)> {
|
||||
) -> Option<Cow<'graphic, RgbaImage>> {
|
||||
match graphic {
|
||||
// Short-circuit spawning a job on the threadpool for blank graphics
|
||||
Graphic::Blank => None,
|
||||
// Dimensions are only included in the key for Graphic::Voxel since otherwise we will
|
||||
// resize on the GPU.
|
||||
Graphic::Image(image, border_color) => keyed_jobs
|
||||
.spawn(pool, (graphic_id, None), || {
|
||||
let image = Arc::clone(image);
|
||||
let border_color = *border_color;
|
||||
move |_| {
|
||||
// Image will be rescaled when sampling from it on the GPU so we don't
|
||||
// need to resize it here.
|
||||
let mut image = image.to_rgba8();
|
||||
// TODO: could potentially do this when loading the image and for voxel
|
||||
// images maybe at some point in the `draw_vox` processing. Or we could
|
||||
// push it in the other direction and do conversion on the GPU.
|
||||
premultiply_alpha(&mut image);
|
||||
(image, border_color)
|
||||
}
|
||||
})
|
||||
.map(|(_, v)| v),
|
||||
Graphic::Image(image, _border_color) => {
|
||||
if premultiply_on_cpu {
|
||||
keyed_jobs
|
||||
.spawn(pool, cache_key, || {
|
||||
let image = Arc::clone(image);
|
||||
move |_| {
|
||||
// Image will be rescaled when sampling from it on the GPU so we don't
|
||||
// need to resize it here.
|
||||
let mut image = image.to_rgba8();
|
||||
// TODO: could potentially do this when loading the image and for voxel
|
||||
// images maybe at some point in the `draw_vox` processing. Or we could
|
||||
// push it in the other direction and do conversion on the GPU.
|
||||
premultiply_alpha(&mut image);
|
||||
image
|
||||
}
|
||||
})
|
||||
.map(|(_, v)| Cow::Owned(v))
|
||||
} else if let Some(rgba) = image.as_rgba8() {
|
||||
Some(Cow::Borrowed(rgba))
|
||||
} else {
|
||||
// TODO: we should require rgba8 format
|
||||
warn!("Non-rgba8 image in UI used this may be deprecated.");
|
||||
Some(Cow::Owned(image.to_rgba8()))
|
||||
}
|
||||
},
|
||||
Graphic::Voxel(segment, trans, sample_strat) => keyed_jobs
|
||||
.spawn(pool, (graphic_id, Some(dims)), || {
|
||||
.spawn(pool, cache_key, || {
|
||||
let segment = Arc::clone(segment);
|
||||
let (trans, sample_strat) = (*trans, *sample_strat);
|
||||
move |_| {
|
||||
// Render voxel model at requested resolution
|
||||
let mut image = renderer::draw_vox(&segment, dims, trans, sample_strat);
|
||||
premultiply_alpha(&mut image);
|
||||
(image, None)
|
||||
if premultiply_on_cpu {
|
||||
premultiply_alpha(&mut image);
|
||||
}
|
||||
image
|
||||
}
|
||||
})
|
||||
.map(|(_, v)| v),
|
||||
.map(|(_, v)| Cow::Owned(v)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -525,19 +666,52 @@ fn atlas_size(renderer: &Renderer) -> Vec2<u32> {
|
||||
.map(|e| (e * GRAPHIC_CACHE_RELATIVE_SIZE).clamp(512, max_texture_size))
|
||||
}
|
||||
|
||||
/// This creates a texture suitable for sampling from during the UI pass and
|
||||
/// rendering too during alpha premultiplication upload passes.
|
||||
fn create_image_texture(
|
||||
renderer: &mut Renderer,
|
||||
size: Vec2<u32>,
|
||||
address_mode: Option<wgpu::AddressMode>,
|
||||
) -> (Texture, UiTextureBindGroup) {
|
||||
let tex_info = wgpu::TextureDescriptor {
|
||||
label: None,
|
||||
size: wgpu::Extent3d {
|
||||
width: size.x,
|
||||
height: size.y,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: wgpu::TextureFormat::Rgba8UnormSrgb,
|
||||
usage: wgpu::TextureUsage::RENDER_ATTACHMENT | wgpu::TextureUsage::SAMPLED,
|
||||
};
|
||||
let view_info = wgpu::TextureViewDescriptor {
|
||||
format: Some(tex_info.format),
|
||||
dimension: Some(wgpu::TextureViewDimension::D2),
|
||||
..Default::default()
|
||||
};
|
||||
let address_mode = address_mode.unwrap_or(wgpu::AddressMode::ClampToEdge);
|
||||
let sampler_info = wgpu::SamplerDescriptor {
|
||||
address_mode_u: address_mode,
|
||||
address_mode_v: address_mode,
|
||||
mag_filter: wgpu::FilterMode::Linear,
|
||||
min_filter: wgpu::FilterMode::Linear,
|
||||
..Default::default()
|
||||
};
|
||||
let tex = renderer.create_texture_raw(&tex_info, &view_info, &sampler_info);
|
||||
let bind = renderer.ui_bind_texture(&tex);
|
||||
(tex, bind)
|
||||
}
|
||||
|
||||
fn create_atlas_texture(
|
||||
renderer: &mut Renderer,
|
||||
) -> (SimpleAtlasAllocator, (Texture, UiTextureBindGroup)) {
|
||||
let size = atlas_size(renderer);
|
||||
// Note: here we assume the max texture size is under i32::MAX.
|
||||
let atlas = SimpleAtlasAllocator::new(size2(size.x as i32, size.y as i32));
|
||||
let texture = {
|
||||
let tex = renderer.create_dynamic_texture(size);
|
||||
let bind = renderer.ui_bind_texture(&tex);
|
||||
(tex, bind)
|
||||
};
|
||||
|
||||
(atlas, texture)
|
||||
let (tex, bind) = create_image_texture(renderer, size, None);
|
||||
(atlas, (tex, bind))
|
||||
}
|
||||
|
||||
fn aabr_from_alloc_rect(rect: guillotiere::Rectangle) -> Aabr<u16> {
|
||||
@ -550,37 +724,49 @@ fn aabr_from_alloc_rect(rect: guillotiere::Rectangle) -> Aabr<u16> {
|
||||
}
|
||||
}
|
||||
|
||||
fn upload_image(renderer: &mut Renderer, aabr: Aabr<u16>, tex: &Texture, image: &RgbaImage) {
|
||||
fn upload_image(
|
||||
renderer: &mut Renderer,
|
||||
aabr: Aabr<u16>,
|
||||
target_texture_uploads: &mut Vec<UiPremultiplyUpload>,
|
||||
image: &RgbaImage,
|
||||
) {
|
||||
let aabr = aabr.map(u32::from);
|
||||
// Check that this image and the target aabr are the same size (otherwise there
|
||||
// is a bug in this module).
|
||||
debug_assert_eq!(aabr.size().into_tuple(), image.dimensions());
|
||||
let offset = aabr.min.into_array();
|
||||
let size = aabr.size().into_array();
|
||||
renderer.update_texture(
|
||||
tex,
|
||||
offset,
|
||||
size,
|
||||
// NOTE: Rgba texture, so each pixel is 4 bytes, ergo this cannot fail.
|
||||
// We make the cast parameters explicit for clarity.
|
||||
bytemuck::cast_slice::<u8, [u8; 4]>(image),
|
||||
);
|
||||
|
||||
// TODO: can we transparently have cpu based version behind this (actually this
|
||||
// would introduce more complexity to be able to do it in the background,
|
||||
// but we could to it not in the background here especially for smaller
|
||||
// things this would work well)
|
||||
let upload = UiPremultiplyUpload::prepare(renderer, image, offset);
|
||||
target_texture_uploads.push(upload);
|
||||
//todo!()
|
||||
}
|
||||
|
||||
// This is used for border_color.is_some() images (ie the map image).
|
||||
fn create_image(
|
||||
renderer: &mut Renderer,
|
||||
image: RgbaImage,
|
||||
_border_color: Rgba<f32>, // See TODO below
|
||||
) -> (Texture, UiTextureBindGroup) {
|
||||
let tex = renderer
|
||||
.create_texture(
|
||||
&DynamicImage::ImageRgba8(image),
|
||||
Some(wgpu::FilterMode::Linear),
|
||||
image: &RgbaImage,
|
||||
texture_parameters: TextureParameters,
|
||||
) -> (Texture, UiTextureBindGroup, Vec<UiPremultiplyUpload>) {
|
||||
let (tex, bind) = create_image_texture(
|
||||
renderer,
|
||||
texture_parameters.size.map(u32::from),
|
||||
texture_parameters
|
||||
.border_color
|
||||
// TODO: either use the desktop only border color or just emulate this
|
||||
// Some(border_color.into_array().into()),
|
||||
Some(wgpu::AddressMode::ClampToBorder),
|
||||
)
|
||||
.expect("create_texture only panics if non ImageRbga8 is passed");
|
||||
let bind = renderer.ui_bind_texture(&tex);
|
||||
|
||||
(tex, bind)
|
||||
//.map(|c| c.into_array().into()),
|
||||
.map(|_| wgpu::AddressMode::ClampToBorder),
|
||||
);
|
||||
let mut uploads = Vec::new();
|
||||
let aabr = Aabr {
|
||||
min: Vec2::zero(),
|
||||
max: texture_parameters.size,
|
||||
};
|
||||
upload_image(renderer, aabr, &mut uploads, image);
|
||||
(tex, bind, uploads)
|
||||
}
|
||||
|
||||
fn premultiply_alpha(image: &mut RgbaImage) {
|
||||
@ -592,7 +778,7 @@ fn premultiply_alpha(image: &mut RgbaImage) {
|
||||
// https://github.com/image-rs/image/blob/a1ce569afd476e881acafdf9e7a5bce294d0db9a/src/buffer.rs#L664
|
||||
let dims = image.dimensions();
|
||||
let image_buffer_len = dims.0 as usize * dims.1 as usize * 4;
|
||||
let (arrays, end) = image[..image_buffer_len].as_chunks_mut::<{ 4 * 4 }>();
|
||||
let (arrays, end) = (&mut **image)[..image_buffer_len].as_chunks_mut::<{ 4 * 4 }>();
|
||||
// Rgba8 has 4 bytes per pixel they should be no remainder when dividing by 4.
|
||||
let (end, _) = end.as_chunks_mut::<4>();
|
||||
end.iter_mut().for_each(|pixel| {
|
||||
@ -637,3 +823,8 @@ fn premultiply_alpha(image: &mut RgbaImage) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Next step: Handling invalidation / removal of old textures when
|
||||
// replace_graphic is used under new resizing scheme.
|
||||
//
|
||||
// TODO: does screenshot texture have COPY_DST? I don't think it needs this.
|
||||
|
@ -61,7 +61,9 @@ impl Cache {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn glyph_cache_tex(&self) -> &(Texture, UiTextureBindGroup) { &self.glyph_cache_tex }
|
||||
pub fn glyph_cache_tex(&self) -> (&Texture, &UiTextureBindGroup) {
|
||||
(&self.glyph_cache_tex.0, &self.glyph_cache_tex.1)
|
||||
}
|
||||
|
||||
pub fn glyph_cache_mut_and_tex(&mut self) -> (&mut GlyphBrush, &(Texture, UiTextureBindGroup)) {
|
||||
(self.glyph_brush.get_mut(), &self.glyph_cache_tex)
|
||||
|
@ -791,7 +791,7 @@ impl IcedRenderer {
|
||||
DrawKind::Image(tex_id) => self.cache.graphic_cache().get_tex(*tex_id),
|
||||
DrawKind::Plain => self.cache.glyph_cache_tex(),
|
||||
};
|
||||
drawer.draw(&tex.1, verts.clone()); // Note: trivial clone
|
||||
drawer.draw(tex.1, verts.clone()); // Note: trivial clone
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -1073,7 +1073,7 @@ impl Ui {
|
||||
DrawKind::Image(tex_id) => self.cache.graphic_cache().get_tex(*tex_id),
|
||||
DrawKind::Plain => self.cache.glyph_cache_tex(),
|
||||
};
|
||||
drawer.draw(&tex.1, verts.clone()); // Note: trivial clone
|
||||
drawer.draw(tex.1, verts.clone()); // Note: trivial clone
|
||||
},
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user