diff --git a/assets/voxygen/shaders/premultiply-alpha-compute.glsl b/assets/voxygen/shaders/premultiply-alpha-compute.glsl new file mode 100644 index 0000000000..36d11c2f30 --- /dev/null +++ b/assets/voxygen/shaders/premultiply-alpha-compute.glsl @@ -0,0 +1,36 @@ +#version 420 core + +// TODO: should we modify this based on the current device? +// TODO: would it be better to have 2D workgroup for writing to a local area in the target image? +layout(local_size_x = 256) in; + +// TODO: writing all images into a single buffer? +layout(set = 0, binding = 0) readonly buffer InputImage { + uint input_pixels[]; +}; + +layout (std140, set = 0, binding = 1) +uniform u_locals { + // Size of the input image. + uvec2 image_size; + // Offset to place the transformed input image at in the target + // image. + uvec2 target_offset; +}; + +layout(rgba8, set = 0, binding = 2) uniform writeonly image2D target_image; + +void main() { + uint global_id = gl_GlobalInvocationId.x; + uvec2 src_pixel_pos = uvec2(global_id % image_size.x, global_id / image_size.x); + // Otherwise this is is an out of bounds compute instance. + if (src_pixel_pos < image_size.y) { + uint pixel = input_pixels[global_id]; + vec4 nonlinear = vec4((pixel >> 16) & 0xFFu, (pixel >> 8) & 0xFFu, (pixel >> 8) & 0xFFu, pixel & 0xFFu); + vec4 linear; + vec4 premultiplied_linear; + vec4 premultiplied_nonlinear; + // No free srgb with image store operations https://www.khronos.org/opengl/wiki/Image_Load_Store#Format_compatibility + imageStore(target_image, src_pixel_pos + target_offset, premultiplied_nonlinear); + } +} diff --git a/voxygen/src/render/pipelines/ui.rs b/voxygen/src/render/pipelines/ui.rs index a741020103..1bc3c7589a 100644 --- a/voxygen/src/render/pipelines/ui.rs +++ b/voxygen/src/render/pipelines/ui.rs @@ -394,3 +394,44 @@ pub fn create_tri( v([tri[2][0], tri[2][1]], [uv_tri[2][0], uv_tri[2][1]]), ) } + +// Steps: +// 1. Upload new image via `Device::create_buffer_init`, with `MAP_WRITE` flag +// to avoid staging buffer. +// 2. Run compute pipeline to multiply by alpha reading from this buffer and +// writing to the final texture (this may be in an atlas or an independent +// texture if the image is over a certain size threshold). +// +// Info needed in compute shader: +// * source buffer +// * target texture +// * image dimensions +// * position in the target texture +// (what is the overhead of compute call? at some point we may be better off +// converting small images on the cpu) +pub struct PremultiplyAlphaPipeline { + pub pipeline: wgpu::RenderPipeline, +} + +impl PremultiplyAlphaPipeline { + pub fn new( + device: &wgpu::Device, + module: &wgpu::ShaderModule, + layout: &PremultiplAlphaLayout, + ) -> Self { + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Premultiply alpha pipeline layout"), + push_constant_ranges: &[], + bind_group_layouts: &[layout], + }); + + let pipeline = device.create_compute_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("Premultiply alpha pipeline"), + layout: Some(&pipeline_layout), + module, + entry_point: "main", + }); + + Self { pipeline } + } +}