Start attempt to premultiply alpha on the GPU.

This commit is contained in:
Imbris 2022-09-08 01:04:41 -04:00
parent 7538b04348
commit 1d51aae3b2
2 changed files with 77 additions and 0 deletions

View File

@ -0,0 +1,36 @@
#version 420 core
// TODO: should we modify this based on the current device?
// TODO: would it be better to have 2D workgroup for writing to a local area in the target image?
layout(local_size_x = 256) in;
// TODO: writing all images into a single buffer?
layout(set = 0, binding = 0) readonly buffer InputImage {
uint input_pixels[];
};
layout (std140, set = 0, binding = 1)
uniform u_locals {
// Size of the input image.
uvec2 image_size;
// Offset to place the transformed input image at in the target
// image.
uvec2 target_offset;
};
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D target_image;
void main() {
uint global_id = gl_GlobalInvocationId.x;
uvec2 src_pixel_pos = uvec2(global_id % image_size.x, global_id / image_size.x);
// Otherwise this is is an out of bounds compute instance.
if (src_pixel_pos < image_size.y) {
uint pixel = input_pixels[global_id];
vec4 nonlinear = vec4((pixel >> 16) & 0xFFu, (pixel >> 8) & 0xFFu, (pixel >> 8) & 0xFFu, pixel & 0xFFu);
vec4 linear;
vec4 premultiplied_linear;
vec4 premultiplied_nonlinear;
// No free srgb with image store operations https://www.khronos.org/opengl/wiki/Image_Load_Store#Format_compatibility
imageStore(target_image, src_pixel_pos + target_offset, premultiplied_nonlinear);
}
}

View File

@ -394,3 +394,44 @@ pub fn create_tri(
v([tri[2][0], tri[2][1]], [uv_tri[2][0], uv_tri[2][1]]),
)
}
// Steps:
// 1. Upload new image via `Device::create_buffer_init`, with `MAP_WRITE` flag
// to avoid staging buffer.
// 2. Run compute pipeline to multiply by alpha reading from this buffer and
// writing to the final texture (this may be in an atlas or an independent
// texture if the image is over a certain size threshold).
//
// Info needed in compute shader:
// * source buffer
// * target texture
// * image dimensions
// * position in the target texture
// (what is the overhead of compute call? at some point we may be better off
// converting small images on the cpu)
pub struct PremultiplyAlphaPipeline {
pub pipeline: wgpu::RenderPipeline,
}
impl PremultiplyAlphaPipeline {
pub fn new(
device: &wgpu::Device,
module: &wgpu::ShaderModule,
layout: &PremultiplAlphaLayout,
) -> Self {
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("Premultiply alpha pipeline layout"),
push_constant_ranges: &[],
bind_group_layouts: &[layout],
});
let pipeline = device.create_compute_pipeline(&wgpu::RenderPipelineDescriptor {
label: Some("Premultiply alpha pipeline"),
layout: Some(&pipeline_layout),
module,
entry_point: "main",
});
Self { pipeline }
}
}