Merge branch 'imbris/pixel-perfection-v2' into 'master'

Move image scaling into the UI shaders rather than precomputing it on the CPU Closes #257 See merge request veloren/veloren!3573
2024-08-30 18:12:32 +00:00 · 2023-04-08 07:06:19 +00:00 · 2023-04-08 07:06:19 +00:00 · 54c39c03f7
commit 54c39c03f7
parent 162509e1c9 5881e44e61
28 changed files with 1600 additions and 265 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -32,6 +32,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Climbing no longer requires having 10 energy
 - Castles will now be placed close to towns
 - Sword
+- Rescaling of images for the UI is now done when sampling from them on the GPU. Improvements are
+  particularily noticeable when opening the map screen (which involves rescaling a few large
+  images) and also when using the voxel minimap view (where a medium size image is updated often). 

 ### Removed

--- a/assets/voxygen/element/ui/generic/frames/banner_bot.png
+++ b/assets/voxygen/element/ui/generic/frames/banner_bot.png
--- a/assets/voxygen/element/ui/generic/frames/esc_menu.png
+++ b/assets/voxygen/element/ui/generic/frames/esc_menu.png
--- a/assets/voxygen/shaders/premultiply-alpha-frag.glsl
+++ b/assets/voxygen/shaders/premultiply-alpha-frag.glsl
@ -0,0 +1,17 @@
+#version 420 core
+#extension GL_EXT_samplerless_texture_functions : enable
+
+layout(set = 0, binding = 0)
+uniform texture2D source_texture;
+
+layout(location = 0) in vec2 source_coords;
+
+layout(location = 0) out vec4 target_color;
+
+void main() {
+    // We get free nonlinear -> linear conversion when sampling from srgb texture;
+    vec4 linear = texelFetch(source_texture, ivec2(source_coords), 0);
+    vec4 premultiplied_linear = vec4(linear.rgb * linear.a, linear.a);
+    // We get free linear -> nonlinear conversion rendering to srgb texture.
+    target_color = premultiplied_linear;
+}
--- a/assets/voxygen/shaders/premultiply-alpha-vert.glsl
+++ b/assets/voxygen/shaders/premultiply-alpha-vert.glsl
@ -0,0 +1,48 @@
+#version 420 core
+
+layout(push_constant) uniform Params {
+    // Size of the source image.
+    uint source_size_xy;
+    // Offset to place the image at in the target texture.
+    //
+    // Origin is the top-left.
+    uint target_offset_xy;
+    // Size of the target texture.
+    uint target_size_xy;
+};
+
+layout(location = 0) out vec2 source_coords;
+
+uvec2 unpack(uint xy) {
+    return uvec2(
+        bitfieldExtract(xy,  0, 16),
+        bitfieldExtract(xy, 16, 16)
+    );
+}
+
+void main() {
+    vec2 source_size = vec2(unpack(source_size_xy));
+    vec2 target_offset = vec2(unpack(target_offset_xy));
+    vec2 target_size = vec2(unpack(target_size_xy));
+
+    // Generate rectangle (counter clockwise triangles)
+    //
+    // 0 0 1 1 1 0
+    float x_select = float(((uint(gl_VertexIndex) + 1u) / 3u) % 2u);
+    // 1 0 0 0 1 1
+    float y_select = float(((uint(gl_VertexIndex) + 5u) / 3u) % 2u);
+
+    source_coords = vec2(
+        // left -> right (on screen)
+        mix(0.0, 1.0, x_select),
+        // bottom -> top (on screen)
+        mix(1.0, 0.0, y_select)
+    ) * source_size;
+
+    vec2 target_coords_normalized = (target_offset + source_coords) / target_size;
+
+    // Flip y and transform [0.0, 1.0] -> [-1.0, 1.0] to get NDC coordinates.
+    vec2 v_pos = ((target_coords_normalized * 2.0) - vec2(1.0)) * vec2(1.0, -1.0); 
+
+    gl_Position = vec4(v_pos, 0.0, 1.0);
+}
--- a/assets/voxygen/shaders/ui-frag.glsl
+++ b/assets/voxygen/shaders/ui-frag.glsl
@ -1,10 +1,12 @@
 #version 420 core

 #include <globals.glsl>
+#include <constants.glsl>

 layout(location = 0) in vec2 f_uv;
 layout(location = 1) in vec4 f_color;
-layout(location = 2) flat in uint f_mode;
+layout(location = 2) flat in vec2 f_scale;
+layout(location = 3) flat in uint f_mode;

 layout (std140, set = 1, binding = 0)
 uniform u_locals {
@ -15,17 +17,197 @@ layout(set = 2, binding = 0)
 uniform texture2D t_tex;
 layout(set = 2, binding = 1)
 uniform sampler s_tex;
+layout (std140, set = 2, binding = 2)
+uniform tex_locals {
+    uvec2 texture_size;
+};

 layout(location = 0) out vec4 tgt_color;

+// Adjusts the provided uv value to account for coverage of pixels from the
+// sampled texture by the current fragment when upscaling.
+//
+// * `pos` - Position in the sampled texture in pixel coordinates. This is
+//   where the center of the current fragment lies on the sampled texture.
+// * `scale` - Scaling of pixels from the sampled texture to the render target.
+//   This is the amount of fragments that each pixel from the sampled texture
+//   covers.
+float upscale_adjust(float pos, float scale) {
+    // To retain crisp borders of upscaled pixel art, images are upscaled
+    // following the algorithm outlined here:
+    //
+    // https://csantosbh.wordpress.com/2014/01/25/manual-texture-filtering-for-pixelated-games-in-webgl/
+    // 
+    // `min(x * scale, 0.5) + max((x - 1.0) * scale, 0.0)`
+    //
+    float frac = fract(pos);
+    // Right of nearest pixel in the sampled texture.
+    float base = floor(pos);
+    // This will be 0.5 when the current fragment lies entirely inside a pixel
+    // in the sampled texture.
+    float adjustment = min(frac * scale, 0.5) + max((frac - 1.0) * scale + 0.5, 0.0);
+    return base + adjustment;
+}
+
+// Computes info needed for downscaling using two samples in a single
+// dimension. This info includes the two position to sample at (called
+// `offsets` even though they aren't actually offsets from the supplied
+// position) and the `weights` to multiply each of those samples by before
+// combining them.
+//
+// See `upscale_adjust` for semantics of `pos` and `scale` parameters.
+// 
+// Ouput via `weights` and `offsets` parameters.
+void downscale_params(float pos, float scale, out vec2 weights, out vec2 offsets) {
+    // For `scale` 0.33333..1.0 we round to the nearest pixel edge and split
+    // there. We compute the length of each side. Then the sampling point is
+    // computed as this distance from the split point via this formula where
+    // `l` is the length of that side of split:
+    //
+    // `1.5 - (1.0 / max(l, 1.0))`
+    //
+    // For `scale` ..0.3333 the current fragment can potentially span more than
+    // 4 pixels (within a single dimension) in the sampled texture. So we can't
+    // perfectly compute the contribution of each covered pixel in the sampled
+    // texture with only 2 samples (along each dimension). Thus, we fallback to
+    // an imperfect technique of just sampling 1 pixel length from the center
+    // on each side of the nearest pixel edge. An alternative might be to
+    // pre-compute mipmap levels that could be sampled from, although this
+    // could interact poorly with the atlas.
+    if (scale > (1.0 / 3.0)) {
+        // Width of the fragment in terms of pixels in the sampled texture.
+        float width = 1.0 / scale;
+        // Right side of the fragment in the sampled texture.
+        float right = pos - width / 2.0;
+        float split = round(pos);
+        float right_len = split - right;
+        float left_len = width - right_len;
+        float right_sample_offset = 1.5 - (1.0 / max(right_len, 1.0));
+        float left_sample_offset = 1.5 - (1.0 / max(left_len, 1.0));
+        offsets = vec2(split) + vec2(-right_sample_offset, left_sample_offset);
+        weights = vec2(right_len, left_len) / width;
+    } else {
+        offsets = round(pos) + vec2(-1.0, 1.0);
+        // We split in the middle so weights for both sides are the same.
+        weights = vec2(0.5);
+    }
+}
+
+// 1 sample
+vec4 upscale_xy(vec2 uv_pixel, vec2 scale) {
+    // When slowly panning something (e.g. the map), a very small amount of
+    // wobbling is still observable (not as much as nearest sampling). It
+    // is possible to eliminate this by making the edges slightly blurry by
+    // lowering the scale a bit here. However, this does make edges little
+    // less crisp and can cause bleeding in from other images packed into
+    // the atlas in the current setup.
+    vec2 adjusted = vec2(upscale_adjust(uv_pixel.x, scale.x), upscale_adjust(uv_pixel.y, scale.y));
+    // Convert back to 0.0..1.0 by dividing by texture size.
+    vec2 uv = adjusted / texture_size;
+    return textureLod(sampler2D(t_tex, s_tex), uv, 0);
+}
+
+// 2 samples
+vec4 upscale_x_downscale_y(vec2 uv_pixel, vec2 scale) {
+    float x_adjusted = upscale_adjust(uv_pixel.x, scale.x);
+    vec2 weights, offsets;
+    downscale_params(uv_pixel.y, scale.y, weights, offsets);
+    vec2 uv0 = vec2(x_adjusted, offsets[0]) / texture_size;
+    vec2 uv1 = vec2(x_adjusted, offsets[1]) / texture_size;
+    vec4 s0 = textureLod(sampler2D(t_tex, s_tex), uv0, 0);
+    vec4 s1 = textureLod(sampler2D(t_tex, s_tex), uv1, 0);
+    return s0 * weights[0] + s1 * weights[1];
+}
+
+// 2 samples
+vec4 downscale_x_upscale_y(vec2 uv_pixel, vec2 scale) {
+    float y_adjusted = upscale_adjust(uv_pixel.y, scale.y);
+    vec2 weights, offsets;
+    downscale_params(uv_pixel.x, scale.x, weights, offsets);
+    vec2 uv0 = vec2(offsets[0], y_adjusted) / texture_size;
+    vec2 uv1 = vec2(offsets[1], y_adjusted) / texture_size;
+    vec4 s0 = textureLod(sampler2D(t_tex, s_tex), uv0, 0);
+    vec4 s1 = textureLod(sampler2D(t_tex, s_tex), uv1, 0);
+    return s0 * weights[0] + s1 * weights[1];
+}
+
+// 4 samples
+vec4 downscale_xy(vec2 uv_pixel, vec2 scale) {
+    vec2 weights_x, offsets_x, weights_y, offsets_y;
+    downscale_params(uv_pixel.x, scale.x, weights_x, offsets_x);
+    downscale_params(uv_pixel.y, scale.y, weights_y, offsets_y);
+    vec2 uv0 = vec2(offsets_x[0], offsets_y[0]) / texture_size;
+    vec2 uv1 = vec2(offsets_x[1], offsets_y[0]) / texture_size;
+    vec2 uv2 = vec2(offsets_x[0], offsets_y[1]) / texture_size;
+    vec2 uv3 = vec2(offsets_x[1], offsets_y[1]) / texture_size;
+    vec4 s0 = textureLod(sampler2D(t_tex, s_tex), uv0, 0);
+    vec4 s1 = textureLod(sampler2D(t_tex, s_tex), uv1, 0);
+    vec4 s2 = textureLod(sampler2D(t_tex, s_tex), uv2, 0);
+    vec4 s3 = textureLod(sampler2D(t_tex, s_tex), uv3, 0);
+    vec4 s01 = s0 * weights_x[0] + s1 * weights_x[1];
+    vec4 s23 = s2 * weights_x[0] + s3 * weights_x[1];
+    // Useful to visualize things below the limit where downscaling is supposed
+    // to be perfectly accurate.
+    /*if (scale.x < (1.0 / 3.0)) {
+        return vec4(1, 0, 0, 1);
+    }*/
+    return s01 * weights_y[0] + s23 * weights_y[1];
+}
+
 void main() {
    // Text
    if (f_mode == uint(0)) {
-        tgt_color = f_color * vec4(1.0, 1.0, 1.0, textureLod(sampler2D(t_tex, s_tex), f_uv, 0).a);
+        // NOTE: This now uses linear filter since all `Texture::new_dynamic`
+        // was changed to this by default. Glyphs are usually rasterized to be
+        // pretty close to the target size (so the filter change may have no
+        // effect), but there are thresholds within which the same rasterized
+        // glyph will be re-used. I wasn't able to observe any differences.
+        vec2 uv = f_uv;
+        #ifdef EXPERIMENTAL_UINEARESTSCALING
+            uv = (floor(uv * texture_size) + 0.5) / texture_size;
+        #endif
+        tgt_color = f_color * vec4(1.0, 1.0, 1.0, textureLod(sampler2D(t_tex, s_tex), uv, 0).a);
    // Image
    // HACK: bit 0 is set for both ordinary and north-facing images.
    } else if ((f_mode & uint(1)) == uint(1)) {
-        tgt_color = f_color * textureLod(sampler2D(t_tex, s_tex), f_uv, 0);
+        // NOTE: We don't have to account for bleeding over the border of an image
+        // due to how the ui currently handles rendering images. Currently, any
+        // edges of an image being rendered that don't line up with a pixel are
+        // snapped to a pixel, so we will never render any pixels containing an
+        // image that lie partly outside that image (and thus the sampling here
+        // will never try to sample outside an image). So we don't have to
+        // worry about bleeding in the atlas and/or what the border behavior
+        // should be.
+
+        // Convert to sampled pixel coordinates.
+        vec2 uv_pixel = f_uv * texture_size;
+        vec4 image_color;
+        #ifdef EXPERIMENTAL_UINEARESTSCALING
+            vec2 uv = (floor(uv_pixel) + 0.5) / texture_size;
+            image_color = textureLod(sampler2D(t_tex, s_tex), uv, 0);
+        #else 
+            if (f_scale.x >= 1.0) {
+                if (f_scale.y >= 1.0) {
+                    image_color = upscale_xy(uv_pixel, f_scale);     
+                } else {
+                    image_color = upscale_x_downscale_y(uv_pixel, f_scale);     
+                }
+            } else {
+                if (f_scale.y >= 1.0) {
+                    image_color = downscale_x_upscale_y(uv_pixel, f_scale);     
+                } else {
+                    image_color = downscale_xy(uv_pixel, f_scale);     
+                }
+            }
+        #endif
+
+        // un-premultiply alpha (linear filtering above requires alpha to be
+        // pre-multiplied)
+        if (image_color.a > 0.001) {
+            image_color.rgb /= image_color.a;
+        } 
+
+        tgt_color = f_color * image_color;
    // 2D Geometry
    } else if (f_mode == uint(2)) {
        tgt_color = f_color;
--- a/assets/voxygen/shaders/ui-vert.glsl
+++ b/assets/voxygen/shaders/ui-vert.glsl
@ -6,7 +6,8 @@ layout(location = 0) in vec2 v_pos;
 layout(location = 1) in vec2 v_uv;
 layout(location = 2) in vec4 v_color;
 layout(location = 3) in vec2 v_center;
-layout(location = 4) in uint v_mode;
+layout(location = 4) in vec2 v_scale;
+layout(location = 5) in uint v_mode;

 layout (std140, set = 1, binding = 0)
 uniform u_locals {
@ -17,10 +18,15 @@ layout(set = 2, binding = 0)
 uniform texture2D t_tex;
 layout(set = 2, binding = 1)
 uniform sampler s_tex;
+layout (std140, set = 2, binding = 2)
+uniform tex_locals {
+    uvec2 texture_size;
+};

 layout(location = 0) out vec2 f_uv;
 layout(location = 1) out vec4 f_color;
-layout(location = 2) flat out uint f_mode;
+layout(location = 2) flat out vec2 f_scale;
+layout(location = 3) flat out uint f_mode;

 void main() {
    f_color = v_color;
@ -39,7 +45,7 @@ void main() {
        gl_Position = vec4(v_pos, 0.5, 1.0);
        vec2 look_at_dir = normalize(vec2(-view_mat[0][2], -view_mat[1][2]));
        // TODO: Consider cleaning up matrix to something more efficient (e.g. a mat3).
-        vec2 aspect_ratio = textureSize(sampler2D(t_tex, s_tex), 0).yx;
+        vec2 aspect_ratio = texture_size.yx;
        mat2 look_at = mat2(look_at_dir.y, look_at_dir.x, -look_at_dir.x, look_at_dir.y);
        vec2 v_centered = (v_uv - v_center) / aspect_ratio;
        vec2 v_rotated = look_at * v_centered;
@ -60,5 +66,6 @@ void main() {
        gl_Position = vec4(v_pos, 0.5, 1.0);
    }

+    f_scale = v_scale;
    f_mode = v_mode;
 }
--- a/voxygen/src/hud/mod.rs
+++ b/voxygen/src/hud/mod.rs
@ -2782,11 +2782,13 @@ impl Hud {
                }

                for (i, timing) in gpu_timings.iter().enumerate() {
-                    let timings_text = &format!(
-                        "{:16}{:.3} ms",
-                        &format!("{}:", timing.1),
-                        timing.2 * 1000.0,
-                    );
+                    let label = timing.1;
+                    // We skip displaying these since they aren't present every frame.
+                    if label.starts_with(crate::render::UI_PREMULTIPLY_PASS) {
+                        continue;
+                    }
+                    let timings_text =
+                        &format!("{:16}{:.3} ms", &format!("{label}:"), timing.2 * 1000.0,);
                    let timings_widget = Text::new(timings_text)
                        .color(TEXT_COLOR)
                        .down(V_PAD)
--- a/voxygen/src/lib.rs
+++ b/voxygen/src/lib.rs
@ -155,7 +155,7 @@ pub trait PlayState {
    fn globals_bind_group(&self) -> &GlobalsBindGroup;

    /// Draw the play state.
-    fn render<'a>(&'a self, drawer: &mut Drawer<'a>, settings: &Settings);
+    fn render(&self, drawer: &mut Drawer<'_>, settings: &Settings);

    /// Determines whether egui will be rendered for this play state
    fn egui_enabled(&self) -> bool;
--- a/voxygen/src/menu/char_selection/mod.rs
+++ b/voxygen/src/menu/char_selection/mod.rs
@ -275,7 +275,7 @@ impl PlayState for CharSelectionState {

    fn globals_bind_group(&self) -> &GlobalsBindGroup { self.scene.global_bind_group() }

-    fn render<'a>(&'a self, drawer: &mut Drawer<'a>, _: &Settings) {
+    fn render(&self, drawer: &mut Drawer<'_>, _: &Settings) {
        let client = self.client.borrow();
        let (humanoid_body, loadout) =
            Self::get_humanoid_body_inventory(&self.char_selection_ui, &client);
--- a/voxygen/src/menu/main/mod.rs
+++ b/voxygen/src/menu/main/mod.rs
@ -394,7 +394,7 @@ impl PlayState for MainMenuState {

    fn globals_bind_group(&self) -> &GlobalsBindGroup { self.scene.global_bind_group() }

-    fn render<'a>(&'a self, drawer: &mut Drawer<'a>, _: &Settings) {
+    fn render(&self, drawer: &mut Drawer<'_>, _: &Settings) {
        // Draw the UI to the screen.
        let mut third_pass = drawer.third_pass();
        if let Some(mut ui_drawer) = third_pass.draw_ui() {
--- a/voxygen/src/render/mod.rs
+++ b/voxygen/src/render/mod.rs
@ -43,7 +43,8 @@ pub use self::{
            create_quad as create_ui_quad,
            create_quad_vert_gradient as create_ui_quad_vert_gradient, create_tri as create_ui_tri,
            BoundLocals as UiBoundLocals, Locals as UiLocals, Mode as UiMode,
-            TextureBindGroup as UiTextureBindGroup, Vertex as UiVertex,
+            TextureBindGroup as UiTextureBindGroup, UploadBatchId as UiUploadBatchId,
+            Vertex as UiVertex,
        },
        GlobalModel, Globals, GlobalsBindGroup, GlobalsLayouts, Light, Shadow,
    },
@ -52,7 +53,7 @@ pub use self::{
            DebugDrawer, DebugShadowDrawer, Drawer, FigureDrawer, FigureShadowDrawer,
            FirstPassDrawer, ParticleDrawer, PreparedUiDrawer, ShadowPassDrawer, SpriteDrawer,
            TerrainDrawer, TerrainShadowDrawer, ThirdPassDrawer, TrailDrawer,
-            TransparentPassDrawer, UiDrawer, VolumetricPassDrawer,
+            TransparentPassDrawer, UiDrawer, VolumetricPassDrawer, UI_PREMULTIPLY_PASS,
        },
        AltIndices, ColLightInfo, CullingMode, Renderer,
    },
@ -536,4 +537,7 @@ pub enum ExperimentalShader {
    SmearReflections,
    /// Apply the point shadows from cheap shadows on top of shadow mapping.
    PointShadowsWithShadowMapping,
+    /// Make the UI uses nearest neighbor filtering for scaling images instead
+    /// of trying to filter based on the coverage of the sampled pixels.
+    UiNearestScaling,
 }
--- a/voxygen/src/render/pipelines/ui.rs
+++ b/voxygen/src/render/pipelines/ui.rs
@ -1,8 +1,19 @@
 use super::super::{Bound, Consts, GlobalsLayouts, Quad, Texture, Tri, Vertex as VertexTrait};
 use bytemuck::{Pod, Zeroable};
+use core::num::NonZeroU32;
 use std::mem;
 use vek::*;

+/// The format of textures that the UI sources image data from.
+///
+/// Note, the is not directly used in all relevant locations, but still helps to
+/// more clearly document the that this is the format being used. Notably,
+/// textures are created via `renderer.create_dynamic_texture(...)` and
+/// `renderer.create_texture(&DynamicImage::ImageRgba(image), ...)` (TODO:
+/// update if we have to refactor when implementing the RENDER_ATTACHMENT
+/// usage).
+const UI_IMAGE_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8UnormSrgb;
+
 #[repr(C)]
 #[derive(Copy, Clone, Debug, Zeroable, Pod)]
 pub struct Vertex {
@ -10,12 +21,17 @@ pub struct Vertex {
    uv: [f32; 2],
    color: [f32; 4],
    center: [f32; 2],
+    // Used calculating where to sample scaled images.
+    scale: [f32; 2],
    mode: u32,
 }

 impl Vertex {
    fn desc<'a>() -> wgpu::VertexBufferLayout<'a> {
-        const ATTRIBUTES: [wgpu::VertexAttribute; 5] = wgpu::vertex_attr_array![0 => Float32x2, 1 => Float32x2, 2 => Float32x4, 3 => Float32x2, 4 => Uint32];
+        const ATTRIBUTES: [wgpu::VertexAttribute; 6] = wgpu::vertex_attr_array![
+            0 => Float32x2, 1 => Float32x2, 2 => Float32x4,
+            3 => Float32x2, 4 => Float32x2,    5 => Uint32,
+        ];
        wgpu::VertexBufferLayout {
            array_stride: Self::STRIDE,
            step_mode: wgpu::InputStepMode::Vertex,
@ -47,6 +63,20 @@ impl Default for Locals {
    fn default() -> Self { Self { pos: [0.0; 4] } }
 }

+#[repr(C)]
+#[derive(Copy, Clone, Debug, Zeroable, Pod)]
+pub struct TexLocals {
+    texture_size: [u32; 2],
+}
+
+impl From<Vec2<u32>> for TexLocals {
+    fn from(texture_size: Vec2<u32>) -> Self {
+        Self {
+            texture_size: texture_size.into_array(),
+        }
+    }
+}
+
 /// Draw text from the text cache texture `tex` in the fragment shader.
 pub const MODE_TEXT: u32 = 0;
 /// Draw an image from the texture at `tex` in the fragment shader.
@ -64,22 +94,44 @@ pub const MODE_IMAGE_SOURCE_NORTH: u32 = 3;
 /// FIXME: Make more principled.
 pub const MODE_IMAGE_TARGET_NORTH: u32 = 5;

+#[derive(Clone, Copy)]
 pub enum Mode {
    Text,
-    Image,
+    Image {
+        scale: Vec2<f32>,
+    },
    Geometry,
-    ImageSourceNorth,
-    ImageTargetNorth,
+    /// Draw an image from the texture at `tex` in the fragment shader, with the
+    /// source rectangle rotated to face north (TODO: detail on what "north"
+    /// means here).
+    ImageSourceNorth {
+        scale: Vec2<f32>,
+    },
+    /// Draw an image from the texture at `tex` in the fragment shader, with the
+    /// target rectangle rotated to face north. (TODO: detail on what "target"
+    /// means)
+    ImageTargetNorth {
+        scale: Vec2<f32>,
+    },
 }

 impl Mode {
    fn value(self) -> u32 {
        match self {
            Mode::Text => MODE_TEXT,
-            Mode::Image => MODE_IMAGE,
+            Mode::Image { .. } => MODE_IMAGE,
            Mode::Geometry => MODE_GEOMETRY,
-            Mode::ImageSourceNorth => MODE_IMAGE_SOURCE_NORTH,
-            Mode::ImageTargetNorth => MODE_IMAGE_TARGET_NORTH,
+            Mode::ImageSourceNorth { .. } => MODE_IMAGE_SOURCE_NORTH,
+            Mode::ImageTargetNorth { .. } => MODE_IMAGE_TARGET_NORTH,
+        }
+    }
+
+    /// Gets the scaling of the displayed image compared to the source.
+    fn scale(self) -> Vec2<f32> {
+        match self {
+            Mode::ImageSourceNorth { scale } | Mode::ImageTargetNorth { scale } => scale,
+            Mode::Image { scale } => scale,
+            Mode::Text | Mode::Geometry => Vec2::one(),
        }
    }
 }
@ -91,8 +143,8 @@ pub struct TextureBindGroup {
 }

 pub struct UiLayout {
-    pub locals: wgpu::BindGroupLayout,
-    pub texture: wgpu::BindGroupLayout,
+    locals: wgpu::BindGroupLayout,
+    texture: wgpu::BindGroupLayout,
 }

 impl UiLayout {
@ -137,6 +189,17 @@ impl UiLayout {
                        },
                        count: None,
                    },
+                    // tex_locals
+                    wgpu::BindGroupLayoutEntry {
+                        binding: 2,
+                        visibility: wgpu::ShaderStage::VERTEX | wgpu::ShaderStage::FRAGMENT,
+                        ty: wgpu::BindingType::Buffer {
+                            ty: wgpu::BufferBindingType::Uniform,
+                            has_dynamic_offset: false,
+                            min_binding_size: None,
+                        },
+                        count: None,
+                    },
                ],
            }),
        }
@ -158,7 +221,12 @@ impl UiLayout {
        }
    }

-    pub fn bind_texture(&self, device: &wgpu::Device, texture: &Texture) -> TextureBindGroup {
+    pub fn bind_texture(
+        &self,
+        device: &wgpu::Device,
+        texture: &Texture,
+        tex_locals: Consts<TexLocals>,
+    ) -> TextureBindGroup {
        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
            label: None,
            layout: &self.texture,
@ -171,6 +239,10 @@ impl UiLayout {
                    binding: 1,
                    resource: wgpu::BindingResource::Sampler(&texture.sampler),
                },
+                wgpu::BindGroupEntry {
+                    binding: 2,
+                    resource: tex_locals.buf().as_entire_binding(),
+                },
            ],
        });

@ -268,17 +340,19 @@ pub fn create_quad_vert_gradient(
    let top_color = top_color.into_array();
    let bottom_color = bottom_color.into_array();

-    let center = if let Mode::ImageSourceNorth = mode {
+    let center = if let Mode::ImageSourceNorth { .. } = mode {
        uv_rect.center().into_array()
    } else {
        rect.center().into_array()
    };
+    let scale = mode.scale().into_array();
    let mode_val = mode.value();
    let v = |pos, uv, color| Vertex {
        pos,
        uv,
        center,
        color,
+        scale,
        mode: mode_val,
    };
    let aabr_to_lbrt = |aabr: Aabr<f32>| (aabr.min.x, aabr.min.y, aabr.max.x, aabr.max.y);
@ -315,12 +389,14 @@ pub fn create_tri(
    mode: Mode,
 ) -> Tri<Vertex> {
    let center = [0.0, 0.0];
+    let scale = mode.scale().into_array();
    let mode_val = mode.value();
    let v = |pos, uv| Vertex {
        pos,
        uv,
        center,
        color: color.into_array(),
+        scale,
        mode: mode_val,
    };
    Tri::new(
@ -329,3 +405,298 @@ pub fn create_tri(
        v([tri[2][0], tri[2][1]], [uv_tri[2][0], uv_tri[2][1]]),
    )
 }
+
+// Premultiplying alpha on the GPU before placing images into the textures that
+// will be sampled from in the UI pipeline.
+//
+// Steps:
+//
+// 1. Upload new image via `Device::create_texture_with_data`.
+//
+//    (NOTE: Initially considered: Creating a storage buffer to read from in the
+//    shader via `Device::create_buffer_init`, with `MAP_WRITE` flag to avoid
+//    staging buffer. However, with GPUs combining usages other than `COPY_SRC`
+//    with `MAP_WRITE` may be less ideal. Plus, by copying into a texture first
+//    we can get free srgb conversion when fetching colors from the texture. In
+//    the future, we may want to branch based on the whether the GPU is
+//    integrated and avoid this extra copy.)
+//
+// 2. Run render pipeline to multiply by alpha reading from this texture and
+//    writing to the final texture (this can either be in an atlas or in an
+//    independent texture if the image is over a certain size threshold).
+//
+//    (NOTE: Initially considered: using a compute pipeline and writing to the
+//     final texture as a storage texture. However, the srgb format can't be
+//     used with storage texture and there is not yet the capability to create
+//     non-srgb views of srgb textures.)
+//
+// Info needed:
+//
+// * source texture (texture binding)
+// * target texture (render attachment)
+// * source image dimensions (push constant)
+// * target texture dimensions (push constant)
+// * position in the target texture (push constant)
+//
+// TODO: potential optimizations
+// * what is the overhead of this draw call call? at some point we may be better
+//   off converting very small images on the cpu and/or batching these into a
+//   single draw call
+// * what is the overhead of creating new small textures? for processing many
+//   small images would it be useful to create a single texture the same size as
+//   our cache texture and use Queue::write_texture?
+// * is using create_buffer_init and reading directly from that (with manual
+//   srgb conversion) worth avoiding staging buffer/copy-to-texture for
+//   integrated GPUs?
+// * premultipying alpha in a release asset preparation step
+
+pub struct PremultiplyAlphaLayout {
+    source_texture: wgpu::BindGroupLayout,
+}
+
+impl PremultiplyAlphaLayout {
+    pub fn new(device: &wgpu::Device) -> Self {
+        Self {
+            source_texture: device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: None,
+                entries: &[
+                    // source_texture
+                    wgpu::BindGroupLayoutEntry {
+                        binding: 0,
+                        visibility: wgpu::ShaderStage::FRAGMENT,
+                        ty: wgpu::BindingType::Texture {
+                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
+                            view_dimension: wgpu::TextureViewDimension::D2,
+                            multisampled: false,
+                        },
+                        count: None,
+                    },
+                ],
+            }),
+        }
+    }
+}
+
+pub struct PremultiplyAlphaPipeline {
+    pub pipeline: wgpu::RenderPipeline,
+}
+
+impl PremultiplyAlphaPipeline {
+    pub fn new(
+        device: &wgpu::Device,
+        vs_module: &wgpu::ShaderModule,
+        fs_module: &wgpu::ShaderModule,
+        layout: &PremultiplyAlphaLayout,
+    ) -> Self {
+        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("Premultiply alpha pipeline layout"),
+            bind_group_layouts: &[&layout.source_texture],
+            push_constant_ranges: &[wgpu::PushConstantRange {
+                stages: wgpu::ShaderStage::VERTEX,
+                range: 0..core::mem::size_of::<PremultiplyAlphaParams>() as u32,
+            }],
+        });
+
+        let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Premultiply alpha pipeline"),
+            layout: Some(&pipeline_layout),
+            vertex: wgpu::VertexState {
+                module: vs_module,
+                entry_point: "main",
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState {
+                topology: wgpu::PrimitiveTopology::TriangleList,
+                strip_index_format: None,
+                front_face: wgpu::FrontFace::Ccw,
+                cull_mode: Some(wgpu::Face::Back),
+                clamp_depth: false,
+                polygon_mode: wgpu::PolygonMode::Fill,
+                conservative: false,
+            },
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: fs_module,
+                entry_point: "main",
+                targets: &[wgpu::ColorTargetState {
+                    format: UI_IMAGE_FORMAT,
+                    blend: None,
+                    write_mask: wgpu::ColorWrite::ALL,
+                }],
+            }),
+        });
+
+        Self { pipeline }
+    }
+}
+
+/// Uploaded as push constant.
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Zeroable, Pod)]
+pub struct PremultiplyAlphaParams {
+    /// Size of the source image.
+    source_size_xy: u32,
+    /// Offset to place the image at in the target texture.
+    ///
+    /// Origin is the top-left.
+    target_offset_xy: u32,
+    /// Size of the target texture.
+    target_size_xy: u32,
+}
+
+/// An image upload that needs alpha premultiplication and which is in a pending
+/// state.
+///
+/// From here we will use the `PremultiplyAlpha` pipeline to premultiply the
+/// alpha while transfering the image to its destination texture.
+pub(in super::super) struct PremultiplyUpload {
+    source_bg: wgpu::BindGroup,
+    source_size_xy: u32,
+    /// The location in the final texture this will be placed at. Technically,
+    /// we don't need this information at this point but it is convenient to
+    /// store it here.
+    offset: Vec2<u16>,
+}
+
+impl PremultiplyUpload {
+    pub(in super::super) fn prepare(
+        device: &wgpu::Device,
+        queue: &wgpu::Queue,
+        layout: &PremultiplyAlphaLayout,
+        image: &image::RgbaImage,
+        offset: Vec2<u16>,
+    ) -> Self {
+        // TODO: duplicating some code from `Texture` since:
+        // 1. We don't need to create a sampler.
+        // 2. Texture::new accepts &DynamicImage which isn't possible to create from
+        //    &RgbaImage without cloning. (this might be addressed on zoomy worldgen
+        //    branch)
+        let image_size = wgpu::Extent3d {
+            width: image.width(),
+            height: image.height(),
+            depth_or_array_layers: 1,
+        };
+        let source_tex = device.create_texture(&wgpu::TextureDescriptor {
+            label: None,
+            size: image_size,
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: wgpu::TextureDimension::D2,
+            format: wgpu::TextureFormat::Rgba8UnormSrgb,
+            usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
+        });
+        queue.write_texture(
+            wgpu::ImageCopyTexture {
+                texture: &source_tex,
+                mip_level: 0,
+                origin: wgpu::Origin3d::ZERO,
+            },
+            &(&**image)[..(image.width() as usize * image.height() as usize * 4)],
+            wgpu::ImageDataLayout {
+                offset: 0,
+                bytes_per_row: NonZeroU32::new(image.width() * 4),
+                rows_per_image: NonZeroU32::new(image.height()),
+            },
+            image_size,
+        );
+        // Create view to use to create bind group
+        let view = source_tex.create_view(&wgpu::TextureViewDescriptor {
+            label: None,
+            format: Some(wgpu::TextureFormat::Rgba8UnormSrgb),
+            dimension: Some(wgpu::TextureViewDimension::D2),
+            aspect: wgpu::TextureAspect::All,
+            base_mip_level: 0,
+            mip_level_count: None,
+            base_array_layer: 0,
+            array_layer_count: None,
+        });
+        let source_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &layout.source_texture,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: wgpu::BindingResource::TextureView(&view),
+            }],
+        });
+
+        // NOTE: We assume the max texture size is less than u16::MAX.
+        let source_size_xy = image_size.width + (image_size.height << 16);
+
+        Self {
+            source_bg,
+            source_size_xy,
+            offset,
+        }
+    }
+
+    /// Semantically, this consumes the `PremultiplyUpload` but we need to keep
+    /// the bind group alive to the end of the render pass and don't want to
+    /// bother storing it somewhere else.
+    pub(in super::super) fn draw_data(
+        &self,
+        target: &Texture,
+    ) -> (&wgpu::BindGroup, PremultiplyAlphaParams) {
+        let target_offset_xy = u32::from(self.offset.x) + (u32::from(self.offset.y) << 16);
+        let target_dims = target.get_dimensions();
+        // NOTE: We assume the max texture size is less than u16::MAX.
+        let target_size_xy = target_dims.x + (target_dims.y << 16);
+        (&self.source_bg, PremultiplyAlphaParams {
+            source_size_xy: self.source_size_xy,
+            target_offset_xy,
+            target_size_xy,
+        })
+    }
+}
+
+use std::sync::Arc;
+/// Per-target texture batched uploads
+#[derive(Default)]
+pub(in super::super) struct BatchedUploads {
+    batches: Vec<(Arc<Texture>, Vec<PremultiplyUpload>)>,
+}
+#[derive(Default, Clone, Copy)]
+pub struct UploadBatchId(usize);
+
+impl BatchedUploads {
+    /// Adds the provided upload to the batch indicated by the provided target
+    /// texture and optional batch id. A new batch will be created if the batch
+    /// id is invalid (doesn't refer to an existing batch) or the provided
+    /// target texture isn't the same as the one associated with the
+    /// provided batch id. Creating a new batch involves cloning the
+    /// provided texture `Arc`.
+    ///
+    /// The id of the batch where the upload is ultimately submitted will be
+    /// returned. This id can be used in subsequent calls to add items to
+    /// the same batch (i.e. uploads for the same texture).
+    ///
+    /// Batch ids will reset every frame, however since we check that the
+    /// texture matches, it is perfectly fine to use a stale id (just keep
+    /// in mind that this will create a new batch). This also means that it is
+    /// sufficient to use `UploadBatchId::default()` when calling this with
+    /// new textures.
+    pub(in super::super) fn submit(
+        &mut self,
+        target_texture: &Arc<Texture>,
+        batch_id: UploadBatchId,
+        upload: PremultiplyUpload,
+    ) -> UploadBatchId {
+        if let Some(batch) = self
+            .batches
+            .get_mut(batch_id.0)
+            .filter(|b| Arc::ptr_eq(&b.0, target_texture))
+        {
+            batch.1.push(upload);
+            batch_id
+        } else {
+            let new_batch_id = UploadBatchId(self.batches.len());
+            self.batches
+                .push((Arc::clone(target_texture), vec![upload]));
+            new_batch_id
+        }
+    }
+
+    pub(in super::super) fn take(&mut self) -> Vec<(Arc<Texture>, Vec<PremultiplyUpload>)> {
+        core::mem::take(&mut self.batches)
+    }
+}
--- a/voxygen/src/render/renderer.rs
+++ b/voxygen/src/render/renderer.rs
@ -63,6 +63,7 @@ struct ImmutableLayouts {
    clouds: clouds::CloudsLayout,
    bloom: bloom::BloomLayout,
    ui: ui::UiLayout,
+    premultiply_alpha: ui::PremultiplyAlphaLayout,
    blit: blit::BlitLayout,
 }

@ -177,6 +178,8 @@ pub struct Renderer {
    profile_times: Vec<wgpu_profiler::GpuTimerScopeResult>,
    profiler_features_enabled: bool,

+    ui_premultiply_uploads: ui::BatchedUploads,
+
    #[cfg(feature = "egui-ui")]
    egui_renderpass: egui_wgpu_backend::RenderPass,

@ -393,6 +396,7 @@ impl Renderer {
                &pipeline_modes,
            ));
            let ui = ui::UiLayout::new(&device);
+            let premultiply_alpha = ui::PremultiplyAlphaLayout::new(&device);
            let blit = blit::BlitLayout::new(&device);

            let immutable = Arc::new(ImmutableLayouts {
@ -407,6 +411,7 @@ impl Renderer {
                clouds,
                bloom,
                ui,
+                premultiply_alpha,
                blit,
            });

@ -542,6 +547,8 @@ impl Renderer {
            profile_times: Vec::new(),
            profiler_features_enabled,

+            ui_premultiply_uploads: Default::default(),
+
            #[cfg(feature = "egui-ui")]
            egui_renderpass,

@ -1434,6 +1441,25 @@ impl Renderer {
        texture.update(&self.queue, offset, size, bytemuck::cast_slice(data))
    }

+    /// See docs on [`ui::BatchedUploads::submit`].
+    pub fn ui_premultiply_upload(
+        &mut self,
+        target_texture: &Arc<Texture>,
+        batch: ui::UploadBatchId,
+        image: &image::RgbaImage,
+        offset: Vec2<u16>,
+    ) -> ui::UploadBatchId {
+        let upload = ui::PremultiplyUpload::prepare(
+            &self.device,
+            &self.queue,
+            &self.layouts.premultiply_alpha,
+            image,
+            offset,
+        );
+        self.ui_premultiply_uploads
+            .submit(target_texture, batch, upload)
+    }
+
    /// Queue to obtain a screenshot on the next frame render
    pub fn create_screenshot(
        &mut self,
--- a/voxygen/src/render/renderer/binding.rs
+++ b/voxygen/src/render/renderer/binding.rs
@ -47,8 +47,12 @@ impl Renderer {
        self.layouts.ui.bind_locals(&self.device, locals)
    }

-    pub fn ui_bind_texture(&self, texture: &Texture) -> ui::TextureBindGroup {
-        self.layouts.ui.bind_texture(&self.device, texture)
+    pub fn ui_bind_texture(&mut self, texture: &Texture) -> ui::TextureBindGroup {
+        let tex_locals = ui::TexLocals::from(texture.get_dimensions().xy());
+        let tex_locals_consts = self.create_consts(&[tex_locals]);
+        self.layouts
+            .ui
+            .bind_texture(&self.device, texture, tex_locals_consts)
    }

    pub fn create_figure_bound_locals(
--- a/voxygen/src/render/renderer/drawer.rs
+++ b/voxygen/src/render/renderer/drawer.rs
@ -12,6 +12,7 @@ use super::{
    rain_occlusion_map::{RainOcclusionMap, RainOcclusionMapRenderer},
    Renderer, ShadowMap, ShadowMapRenderer,
 };
+use common_base::prof_span;
 use core::{num::NonZeroU32, ops::Range};
 use std::sync::Arc;
 use vek::Aabr;
@ -19,6 +20,9 @@ use wgpu_profiler::scope::{ManualOwningScope, OwningScope, Scope};
 #[cfg(feature = "egui-ui")]
 use {common_base::span, egui_wgpu_backend::ScreenDescriptor, egui_winit_platform::Platform};

+/// Gpu timing label prefix associated with the UI alpha premultiplication pass.
+pub const UI_PREMULTIPLY_PASS: &str = "ui_premultiply_pass";
+
 // Currently available pipelines
 enum Pipelines<'frame> {
    Interface(&'frame super::InterfacePipelines),
@ -36,6 +40,14 @@ impl<'frame> Pipelines<'frame> {
        }
    }

+    fn premultiply_alpha(&self) -> Option<&ui::PremultiplyAlphaPipeline> {
+        match self {
+            Pipelines::Interface(pipelines) => Some(&pipelines.premultiply_alpha),
+            Pipelines::All(pipelines) => Some(&pipelines.premultiply_alpha),
+            Pipelines::None => None,
+        }
+    }
+
    fn blit(&self) -> Option<&blit::BlitPipeline> {
        match self {
            Pipelines::Interface(pipelines) => Some(&pipelines.blit),
@ -66,6 +78,7 @@ struct RendererBorrow<'frame> {
    pipeline_modes: &'frame super::PipelineModes,
    quad_index_buffer_u16: &'frame Buffer<u16>,
    quad_index_buffer_u32: &'frame Buffer<u32>,
+    ui_premultiply_uploads: &'frame mut ui::BatchedUploads,
    #[cfg(feature = "egui-ui")]
    egui_render_pass: &'frame mut egui_wgpu_backend::RenderPass,
 }
@ -117,6 +130,7 @@ impl<'frame> Drawer<'frame> {
            pipeline_modes: &renderer.pipeline_modes,
            quad_index_buffer_u16: &renderer.quad_index_buffer_u16,
            quad_index_buffer_u32: &renderer.quad_index_buffer_u32,
+            ui_premultiply_uploads: &mut renderer.ui_premultiply_uploads,
            #[cfg(feature = "egui-ui")]
            egui_render_pass: &mut renderer.egui_renderpass,
        };
@ -424,7 +438,49 @@ impl<'frame> Drawer<'frame> {
        });
    }

+    /// Runs render passes with alpha premultiplication pipeline to complete any
+    /// pending uploads.
+    fn run_ui_premultiply_passes(&mut self) {
+        prof_span!("run_ui_premultiply_passes");
+        let Some(premultiply_alpha) = self.borrow.pipelines.premultiply_alpha() else { return };
+        let encoder = self.encoder.as_mut().unwrap();
+        let device = self.borrow.device;
+
+        let targets = self.borrow.ui_premultiply_uploads.take();
+
+        for (i, (target_texture, uploads)) in targets.into_iter().enumerate() {
+            prof_span!("ui premultiply pass");
+            let profile_name = format!("{UI_PREMULTIPLY_PASS} {i}");
+            let label = format!("ui premultiply pass {i}");
+            let mut render_pass =
+                encoder.scoped_render_pass(&profile_name, device, &wgpu::RenderPassDescriptor {
+                    label: Some(&label),
+                    color_attachments: &[wgpu::RenderPassColorAttachment {
+                        view: &target_texture.view,
+                        resolve_target: None,
+                        ops: wgpu::Operations {
+                            load: wgpu::LoadOp::Load,
+                            store: true,
+                        },
+                    }],
+                    depth_stencil_attachment: None,
+                });
+            render_pass.set_pipeline(&premultiply_alpha.pipeline);
+            for upload in &uploads {
+                let (source_bind_group, push_constant_data) = upload.draw_data(&target_texture);
+                let bytes = bytemuck::bytes_of(&push_constant_data);
+                render_pass.set_bind_group(0, source_bind_group, &[]);
+                render_pass.set_push_constants(wgpu::ShaderStage::VERTEX, 0, bytes);
+                render_pass.draw(0..6, 0..1);
+            }
+        }
+    }
+
+    /// Note, this automatically calls the internal `run_ui_premultiply_passes`
+    /// to complete any pending image uploads for the UI.
    pub fn third_pass(&mut self) -> ThirdPassDrawer {
+        self.run_ui_premultiply_passes();
+
        let encoder = self.encoder.as_mut().unwrap();
        let device = self.borrow.device;
        let mut render_pass =
@ -498,7 +554,7 @@ impl<'frame> Drawer<'frame> {

    /// Does nothing if the shadow pipelines are not available or shadow map
    /// rendering is disabled
-    pub fn draw_point_shadows<'data: 'frame>(
+    pub fn draw_point_shadows<'data>(
        &mut self,
        matrices: &[shadow::PointLightMatrix; 126],
        chunks: impl Clone
--- a/voxygen/src/render/renderer/pipeline_creation.rs
+++ b/voxygen/src/render/renderer/pipeline_creation.rs
@ -33,6 +33,7 @@ pub struct Pipelines {
    pub lod_object: lod_object::LodObjectPipeline,
    pub terrain: terrain::TerrainPipeline,
    pub ui: ui::UiPipeline,
+    pub premultiply_alpha: ui::PremultiplyAlphaPipeline,
    pub blit: blit::BlitPipeline,
 }

@ -79,6 +80,7 @@ pub struct IngameAndShadowPipelines {
 /// Use to decouple interface pipeline creation when initializing the renderer
 pub struct InterfacePipelines {
    pub ui: ui::UiPipeline,
+    pub premultiply_alpha: ui::PremultiplyAlphaPipeline,
    pub blit: blit::BlitPipeline,
 }

@ -100,6 +102,7 @@ impl Pipelines {
            lod_object: ingame.lod_object,
            terrain: ingame.terrain,
            ui: interface.ui,
+            premultiply_alpha: interface.premultiply_alpha,
            blit: interface.blit,
        }
    }
@ -127,6 +130,8 @@ struct ShaderModules {
    trail_frag: wgpu::ShaderModule,
    ui_vert: wgpu::ShaderModule,
    ui_frag: wgpu::ShaderModule,
+    premultiply_alpha_vert: wgpu::ShaderModule,
+    premultiply_alpha_frag: wgpu::ShaderModule,
    lod_terrain_vert: wgpu::ShaderModule,
    lod_terrain_frag: wgpu::ShaderModule,
    clouds_vert: wgpu::ShaderModule,
@ -336,6 +341,8 @@ impl ShaderModules {
            trail_frag: create_shader("trail-frag", ShaderKind::Fragment)?,
            ui_vert: create_shader("ui-vert", ShaderKind::Vertex)?,
            ui_frag: create_shader("ui-frag", ShaderKind::Fragment)?,
+            premultiply_alpha_vert: create_shader("premultiply-alpha-vert", ShaderKind::Vertex)?,
+            premultiply_alpha_frag: create_shader("premultiply-alpha-frag", ShaderKind::Fragment)?,
            lod_terrain_vert: create_shader("lod-terrain-vert", ShaderKind::Vertex)?,
            lod_terrain_frag: create_shader("lod-terrain-frag", ShaderKind::Fragment)?,
            clouds_vert: create_shader("clouds-vert", ShaderKind::Vertex)?,
@ -416,11 +423,11 @@ struct PipelineNeeds<'a> {
 fn create_interface_pipelines(
    needs: PipelineNeeds,
    pool: &rayon::ThreadPool,
-    tasks: [Task; 2],
+    tasks: [Task; 3],
 ) -> InterfacePipelines {
    prof_span!(_guard, "create_interface_pipelines");

-    let [ui_task, blit_task] = tasks;
+    let [ui_task, premultiply_alpha_task, blit_task] = tasks;
    // Construct a pipeline for rendering UI elements
    let create_ui = || {
        ui_task.run(
@ -438,6 +445,20 @@ fn create_interface_pipelines(
        )
    };

+    let create_premultiply_alpha = || {
+        premultiply_alpha_task.run(
+            || {
+                ui::PremultiplyAlphaPipeline::new(
+                    needs.device,
+                    &needs.shaders.premultiply_alpha_vert,
+                    &needs.shaders.premultiply_alpha_frag,
+                    &needs.layouts.premultiply_alpha,
+                )
+            },
+            "premultiply alpha pipeline creation",
+        )
+    };
+
    // Construct a pipeline for blitting, used during screenshotting
    let create_blit = || {
        blit_task.run(
@ -454,9 +475,15 @@ fn create_interface_pipelines(
        )
    };

-    let (ui, blit) = pool.join(create_ui, create_blit);
+    let (ui, (premultiply_alpha, blit)) = pool.join(create_ui, || {
+        pool.join(create_premultiply_alpha, create_blit)
+    });

-    InterfacePipelines { ui, blit }
+    InterfacePipelines {
+        ui,
+        premultiply_alpha,
+        blit,
+    }
 }

 /// Create IngamePipelines and shadow pipelines in parallel
--- a/voxygen/src/render/renderer/shaders.rs
+++ b/voxygen/src/render/renderer/shaders.rs
@ -73,6 +73,8 @@ impl assets::Compound for Shaders {
            "trail-frag",
            "ui-vert",
            "ui-frag",
+            "premultiply-alpha-vert",
+            "premultiply-alpha-frag",
            "lod-terrain-vert",
            "lod-terrain-frag",
            "clouds-vert",
--- a/voxygen/src/render/texture.rs
+++ b/voxygen/src/render/texture.rs
@ -136,8 +136,8 @@ impl Texture {
            address_mode_u: wgpu::AddressMode::ClampToEdge,
            address_mode_v: wgpu::AddressMode::ClampToEdge,
            address_mode_w: wgpu::AddressMode::ClampToEdge,
-            mag_filter: wgpu::FilterMode::Nearest,
-            min_filter: wgpu::FilterMode::Nearest,
+            mag_filter: wgpu::FilterMode::Linear,
+            min_filter: wgpu::FilterMode::Linear,
            mipmap_filter: wgpu::FilterMode::Nearest,
            ..Default::default()
        };
@ -224,6 +224,7 @@ impl Texture {
        );
    }

+    // TODO: remove `get` from this name
    /// Get dimensions of the represented image.
    pub fn get_dimensions(&self) -> vek::Vec3<u32> {
        vek::Vec3::new(
--- a/voxygen/src/scene/mod.rs
+++ b/voxygen/src/scene/mod.rs
@ -1232,9 +1232,9 @@ impl Scene {
    pub fn global_bind_group(&self) -> &GlobalsBindGroup { &self.globals_bind_group }

    /// Render the scene using the provided `Drawer`.
-    pub fn render<'a>(
-        &'a self,
-        drawer: &mut Drawer<'a>,
+    pub fn render(
+        &self,
+        drawer: &mut Drawer<'_>,
        state: &State,
        viewpoint_entity: EcsEntity,
        tick: u64,
--- a/voxygen/src/session/mod.rs
+++ b/voxygen/src/session/mod.rs
@ -1920,7 +1920,7 @@ impl PlayState for SessionState {
    /// Render the session to the screen.
    ///
    /// This method should be called once per frame.
-    fn render<'a>(&'a self, drawer: &mut Drawer<'a>, settings: &Settings) {
+    fn render(&self, drawer: &mut Drawer<'_>, settings: &Settings) {
        span!(_guard, "render", "<Session as PlayState>::render");

        let client = self.client.borrow();
--- a/voxygen/src/ui/cache.rs
+++ b/voxygen/src/ui/cache.rs
@ -7,6 +7,9 @@ use conrod_core::{text::GlyphCache, widget::Id};
 use hashbrown::HashMap;
 use vek::*;

+// TODO: probably make cache fields where we have mut getters into just public
+// fields
+
 // Multiplied by current window size
 const GLYPH_CACHE_SIZE: u32 = 1;
 // Glyph cache tolerances
@ -51,7 +54,9 @@ impl Cache {
        })
    }

-    pub fn glyph_cache_tex(&self) -> &(Texture, UiTextureBindGroup) { &self.glyph_cache_tex }
+    pub fn glyph_cache_tex(&self) -> (&Texture, &UiTextureBindGroup) {
+        (&self.glyph_cache_tex.0, &self.glyph_cache_tex.1)
+    }

    pub fn cache_mut_and_tex(
        &mut self,
--- a/voxygen/src/ui/graphic/mod.rs
+++ b/voxygen/src/ui/graphic/mod.rs
--- a/voxygen/src/ui/graphic/pixel_art.rs
+++ b/voxygen/src/ui/graphic/pixel_art.rs
@ -11,10 +11,13 @@ const EPSILON: f32 = 0.0001;

 // Averaging colors with alpha such that when blending with the background color
 // the same color will be produced as when the individual colors were blended
-// with the background and then averaged
+// with the background and then averaged.
+//
 // Say we have two areas that we are combining to form a single pixel
 // A1 and A2 where these are the fraction of the area of the pixel each color
-// contributes to Then if the colors were opaque we would say that the final
+// contributes to.
+//
+// Then if the colors were opaque we would say that the final
 // color output color o3 is
 //     E1: o3 = A1 * o1 + A2 * o2
 // where o1 and o2 are the opaque colors of the two areas
@ -30,7 +33,7 @@ const EPSILON: f32 = 0.0001;
 //     E6: c3 * a3 = A1 * c1 * a1 + A2 * c2 * a2
 //     E7: b * (1 - a3) = A1 * b * (1 - a1) + A2 * b * (1 - a2)
 // dropping b from E7 and solving for a3
-//     E8: a3 = 1 - A1 * (1 - a1) + A2 * (1 - a2)
+//     E8: a3 = 1 - A1 * (1 - a1) - A2 * (1 - a2)
 // we can now calculate the combined alpha value
 // and E6 can then be solved for c3
 //     E9: c3 = (A1 * c1 * a1 + A2 * c2 * a2) / a3
--- a/voxygen/src/ui/graphic/renderer.rs
+++ b/voxygen/src/ui/graphic/renderer.rs
@ -14,7 +14,7 @@ pub enum SampleStrat {
    PixelCoverage,
 }

-#[derive(Clone)]
+#[derive(Clone, Copy)]
 pub struct Transform {
    pub ori: Quaternion<f32>,
    pub offset: Vec3<f32>,
--- a/voxygen/src/ui/ice/cache.rs
+++ b/voxygen/src/ui/ice/cache.rs
@ -8,6 +8,9 @@ use glyph_brush::GlyphBrushBuilder;
 use std::cell::{RefCell, RefMut};
 use vek::*;

+// TODO: probably make cache fields where we have mut getters into just public
+// fields
+
 // Multiplied by current window size
 const GLYPH_CACHE_SIZE: u32 = 1;
 // Glyph cache tolerances
@ -61,7 +64,9 @@ impl Cache {
        })
    }

-    pub fn glyph_cache_tex(&self) -> &(Texture, UiTextureBindGroup) { &self.glyph_cache_tex }
+    pub fn glyph_cache_tex(&self) -> (&Texture, &UiTextureBindGroup) {
+        (&self.glyph_cache_tex.0, &self.glyph_cache_tex.1)
+    }

    pub fn glyph_cache_mut_and_tex(&mut self) -> (&mut GlyphBrush, &(Texture, UiTextureBindGroup)) {
        (self.glyph_brush.get_mut(), &self.glyph_cache_tex)
--- a/voxygen/src/ui/ice/renderer/mod.rs
+++ b/voxygen/src/ui/ice/renderer/mod.rs
@ -533,7 +533,7 @@ impl IcedRenderer {
                }

                // Cache graphic at particular resolution.
-                let (uv_aabr, tex_id) = match graphic_cache.cache_res(
+                let (uv_aabr, scale, tex_id) = match graphic_cache.cache_res(
                    renderer,
                    pool,
                    graphic_id,
@ -543,7 +543,7 @@ impl IcedRenderer {
                    rotation,
                ) {
                    // TODO: get dims from graphic_cache (or have it return floats directly)
-                    Some((aabr, tex_id)) => {
+                    Some(((aabr, scale), tex_id)) => {
                        let cache_dims = graphic_cache
                            .get_tex(tex_id)
                            .0
@ -552,7 +552,7 @@ impl IcedRenderer {
                            .map(|e| e as f32);
                        let min = Vec2::new(aabr.min.x as f32, aabr.max.y as f32) / cache_dims;
                        let max = Vec2::new(aabr.max.x as f32, aabr.min.y as f32) / cache_dims;
-                        (Aabr { min, max }, tex_id)
+                        (Aabr { min, max }, scale, tex_id)
                    },
                    None => return,
                };
@ -562,7 +562,9 @@ impl IcedRenderer {
                self.switch_state(State::Image(tex_id));

                self.mesh
-                    .push_quad(create_ui_quad(gl_aabr, uv_aabr, color, UiMode::Image));
+                    .push_quad(create_ui_quad(gl_aabr, uv_aabr, color, UiMode::Image {
+                        scale,
+                    }));
            },
            Primitive::Gradient {
                bounds,
@ -789,7 +791,7 @@ impl IcedRenderer {
                        DrawKind::Image(tex_id) => self.cache.graphic_cache().get_tex(*tex_id),
                        DrawKind::Plain => self.cache.glyph_cache_tex(),
                    };
-                    drawer.draw(&tex.1, verts.clone()); // Note: trivial clone
+                    drawer.draw(tex.1, verts.clone()); // Note: trivial clone
                },
            }
        }
--- a/voxygen/src/ui/mod.rs
+++ b/voxygen/src/ui/mod.rs
@ -854,7 +854,7 @@ impl Ui {
                        srgba_to_linear(color.unwrap_or(conrod_core::color::WHITE).to_fsa().into());

                    // Cache graphic at particular resolution.
-                    let (uv_aabr, tex_id) = match graphic_cache.cache_res(
+                    let (uv_aabr, scale, tex_id) = match graphic_cache.cache_res(
                        renderer,
                        pool,
                        *graphic_id,
@ -863,7 +863,7 @@ impl Ui {
                        *rotation,
                    ) {
                        // TODO: get dims from graphic_cache (or have it return floats directly)
-                        Some((aabr, tex_id)) => {
+                        Some(((aabr, scale), tex_id)) => {
                            let cache_dims = graphic_cache
                                .get_tex(tex_id)
                                .0
@ -872,7 +872,7 @@ impl Ui {
                                .map(|e| e as f32);
                            let min = Vec2::new(aabr.min.x as f32, aabr.max.y as f32) / cache_dims;
                            let max = Vec2::new(aabr.max.x as f32, aabr.min.y as f32) / cache_dims;
-                            (Aabr { min, max }, tex_id)
+                            (Aabr { min, max }, scale, tex_id)
                        },
                        None => continue,
                    };
@ -897,10 +897,10 @@ impl Ui {

                    mesh.push_quad(create_ui_quad(gl_aabr, uv_aabr, color, match *rotation {
                        Rotation::None | Rotation::Cw90 | Rotation::Cw180 | Rotation::Cw270 => {
-                            UiMode::Image
+                            UiMode::Image { scale }
                        },
-                        Rotation::SourceNorth => UiMode::ImageSourceNorth,
-                        Rotation::TargetNorth => UiMode::ImageTargetNorth,
+                        Rotation::SourceNorth => UiMode::ImageSourceNorth { scale },
+                        Rotation::TargetNorth => UiMode::ImageTargetNorth { scale },
                    }));
                },
                PrimitiveKind::Text { .. } => {
@ -1073,7 +1073,7 @@ impl Ui {
                        DrawKind::Image(tex_id) => self.cache.graphic_cache().get_tex(*tex_id),
                        DrawKind::Plain => self.cache.glyph_cache_tex(),
                    };
-                    drawer.draw(&tex.1, verts.clone()); // Note: trivial clone
+                    drawer.draw(tex.1, verts.clone()); // Note: trivial clone
                },
            }
        }