diff --git a/assets/voxygen/shaders/premultiply-alpha-compute.glsl b/assets/voxygen/shaders/premultiply-alpha-compute.glsl
new file mode 100644
index 0000000000..36d11c2f30
--- /dev/null
+++ b/assets/voxygen/shaders/premultiply-alpha-compute.glsl
@@ -0,0 +1,36 @@
+#version 420 core
+
+// TODO: should we modify this based on the current device?
+// TODO: would it be better to have 2D workgroup for writing to a local area in the target image? 
+layout(local_size_x = 256) in;
+
+// TODO: writing all images into a single buffer?
+layout(set = 0, binding = 0) readonly buffer InputImage {
+    uint input_pixels[];
+};
+
+layout (std140, set = 0, binding = 1)
+uniform u_locals {
+    // Size of the input image.
+    uvec2 image_size;
+    // Offset to place the transformed input image at in the target
+    // image.
+    uvec2 target_offset;
+};
+
+layout(rgba8, set = 0, binding = 2) uniform writeonly image2D target_image;
+
+void main() {
+    uint global_id = gl_GlobalInvocationId.x;
+    uvec2 src_pixel_pos = uvec2(global_id % image_size.x, global_id / image_size.x);
+    // Otherwise this is is an out of bounds compute instance.
+    if (src_pixel_pos < image_size.y) {
+        uint pixel = input_pixels[global_id]; 
+        vec4 nonlinear = vec4((pixel >> 16) & 0xFFu, (pixel >> 8) & 0xFFu, (pixel >> 8) & 0xFFu, pixel & 0xFFu);
+        vec4 linear;
+        vec4 premultiplied_linear;
+        vec4 premultiplied_nonlinear;
+        // No free srgb with image store operations https://www.khronos.org/opengl/wiki/Image_Load_Store#Format_compatibility
+        imageStore(target_image, src_pixel_pos + target_offset, premultiplied_nonlinear);
+    }
+}
diff --git a/voxygen/src/render/pipelines/ui.rs b/voxygen/src/render/pipelines/ui.rs
index a741020103..1bc3c7589a 100644
--- a/voxygen/src/render/pipelines/ui.rs
+++ b/voxygen/src/render/pipelines/ui.rs
@@ -394,3 +394,44 @@ pub fn create_tri(
         v([tri[2][0], tri[2][1]], [uv_tri[2][0], uv_tri[2][1]]),
     )
 }
+
+// Steps:
+// 1. Upload new image via `Device::create_buffer_init`, with `MAP_WRITE` flag
+//    to avoid staging buffer.
+// 2. Run compute pipeline to multiply by alpha reading from this buffer and
+//    writing to the final texture (this may be in an atlas or an independent
+//    texture if the image is over a certain size threshold).
+//
+// Info needed in compute shader:
+// * source buffer
+// * target texture
+// * image dimensions
+// * position in the target texture
+// (what is the overhead of compute call? at some point we may be better off
+// converting small images on the cpu)
+pub struct PremultiplyAlphaPipeline {
+    pub pipeline: wgpu::RenderPipeline,
+}
+
+impl PremultiplyAlphaPipeline {
+    pub fn new(
+        device: &wgpu::Device,
+        module: &wgpu::ShaderModule,
+        layout: &PremultiplAlphaLayout,
+    ) -> Self {
+        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("Premultiply alpha pipeline layout"),
+            push_constant_ranges: &[],
+            bind_group_layouts: &[layout],
+        });
+
+        let pipeline = device.create_compute_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Premultiply alpha pipeline"),
+            layout: Some(&pipeline_layout),
+            module,
+            entry_point: "main",
+        });
+
+        Self { pipeline }
+    }
+}