Use fast-srgb8 crate to efficiently convert between non-linear srgb u8 and

linear f32 values for performing alpha premultiplication on the CPU.
2024-08-30 18:12:32 +00:00 · 2022-09-03 16:21:50 -04:00
parent eb6d16b02c
commit f62c2cde70
4 changed files with 30 additions and 10 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1959,6 +1959,12 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"

+[[package]]
+name = "fast-srgb8"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd2e7510819d6fbf51a5545c8f922716ecfb14df168a3242f7d33e0239efe6a1"
+
 [[package]]
 name = "fastrand"
 version = "1.8.0"
@ -7008,6 +7014,7 @@ dependencies = [
 "enum-iterator 1.1.3",
 "etagere",
 "euc",
+ "fast-srgb8",
 "gilrs",
 "glyph_brush",
 "guillotiere",
--- a/voxygen/Cargo.toml
+++ b/voxygen/Cargo.toml
@ -134,6 +134,7 @@ num_cpus = "1.0"
 # vec_map = { version = "0.8.2" }
 inline_tweak = "1.0.2"
 itertools = "0.10.0"
+fast-srgb8 = "1.0.0"

 # Tracy
 tracing = "0.1"
--- a/voxygen/src/lib.rs
+++ b/voxygen/src/lib.rs
@ -5,6 +5,7 @@
 #![feature(
    array_methods,
    array_zip,
+    array_from_fn,
    drain_filter,
    once_cell,
    trait_alias,
--- a/voxygen/src/ui/graphic/mod.rs
+++ b/voxygen/src/ui/graphic/mod.rs
@ -584,25 +584,36 @@ fn create_image(
 }

 fn premultiply_alpha(image: &mut RgbaImage) {
+    use fast_srgb8::{f32x4_to_srgb8, srgb8_to_f32};
    // S-TODO: temp remove me
-    // TODO: check with minimap
-    // TODO: log image size
-    // TODO: benchmark
-    common_base::prof_span!("premultiply_alpha");
+    // TODO: benchmark (29 ns per pixel)
    tracing::error!("{:?}", image.dimensions());
+    common_base::prof_span!("premultiply_alpha");
    use common::util::{linear_to_srgba, srgba_to_linear};
    image.pixels_mut().for_each(|pixel| {
        let alpha = pixel.0[3];
-        if alpha == 0 && pixel.0 != [0; 4] {
+        // With fast path checks, longest image was 16 ms with current assets.
+        // Without longest is 60 ms. (but not the same image!)
+        if alpha == 0 {
            pixel.0 = [0; 4];
        } else if alpha != 255 {
            // Convert to linear, multiply color components by alpha, and convert back to
            // non-linear.
-            let linear = srgba_to_linear(Rgba::from(pixel.0).map(|e: u8| e as f32 / 255.0));
-            let premultiplied = Rgba::from_translucent(Rgb::from(linear) * linear.a, linear.a);
-            pixel.0 = linear_to_srgba(premultiplied)
-                .map(|e| (e * 255.0) as u8)
-                .into_array();
+            let linear = Rgba::new(
+                srgb8_to_f32(pixel.0[0]),
+                srgb8_to_f32(pixel.0[1]),
+                srgb8_to_f32(pixel.0[2]),
+                alpha as f32 / 255.0,
+            );
+            let converted = fast_srgb8::f32x4_to_srgb8([
+                linear.r * linear.a,
+                linear.g * linear.a,
+                linear.b * linear.a,
+                0.0,
+            ]);
+            pixel.0[0] = converted[0];
+            pixel.0[1] = converted[1];
+            pixel.0[2] = converted[2];
        }
    })
 }